diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e045170561d..3dcce68ab46 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Exclude: All with TSAN, MSAN, UBSAN, Coverage - [ ] Exclude: All with aarch64, release, debug --- +- [ ] Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.) +- [ ] Exclude: AST fuzzers +--- - [ ] Do not test - [ ] Woolen Wolfdog - [ ] Upload binaries for special builds diff --git a/.github/actions/clean/action.yml b/.github/actions/clean/action.yml index 547738b17cc..8c22523cacf 100644 --- a/.github/actions/clean/action.yml +++ b/.github/actions/clean/action.yml @@ -1,11 +1,23 @@ name: Clean runner description: Clean the runner's temp path on ending +inputs: + images: + description: clean docker images + default: false + type: boolean runs: using: "composite" steps: - - name: Clean + - name: Clean Temp shell: bash run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "${{runner.temp}}" + sudo rm -fr "${{runner.temp}}" + - name: Clean Docker Containers + shell: bash + run: | + docker rm -vf $(docker ps -aq) ||: + - name: Clean Docker Images + if: ${{ inputs.images }} + shell: bash + run: | + docker rmi -f $(docker images -aq) ||: diff --git a/.github/actions/debug/action.yml b/.github/actions/debug/action.yml new file mode 100644 index 00000000000..e1fe3f28024 --- /dev/null +++ b/.github/actions/debug/action.yml @@ -0,0 +1,18 @@ +name: DebugInfo +description: Prints workflow debug info + +runs: + using: "composite" + steps: + - name: Print envs + shell: bash + run: | + echo "::group::Envs" + env + echo "::endgroup::" + - name: Print Event.json + shell: bash + run: | + echo "::group::Event.json" + python3 -m json.tool "$GITHUB_EVENT_PATH" + echo "::endgroup::" diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml new file mode 100644 index 00000000000..2fdf4e30a70 --- /dev/null +++ b/.github/workflows/auto_releases.yml @@ -0,0 +1,99 @@ +name: AutoReleases + +env: + PYTHONUNBUFFERED: 1 + +concurrency: + group: autoreleases + +on: + # schedule: + # - cron: '0 9 * * *' + workflow_dispatch: + inputs: + dry-run: + description: 'Dry run' + required: false + default: false + type: boolean + +jobs: + AutoReleaseInfo: + runs-on: [self-hosted, release-maker] + outputs: + data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }} + dry_run: ${{ steps.info.outputs.DRY_RUN }} + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + fetch-depth: 0 # full history needed + - name: Debug Info + uses: ./.github/actions/debug + - name: Prepare Info + id: info + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --prepare + echo "::group::Auto Release Info" + python3 -m json.tool /tmp/autorelease_info.json + echo "::endgroup::" + { + echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_OUTPUT" + if [[ "${{ github.event_name }}" == "schedule" ]]; then + echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" + else + echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_OUTPUT" + fi + - name: Post Release Branch statuses + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-status + - name: Clean up + uses: ./.github/actions/clean + + Releases: + needs: AutoReleaseInfo + strategy: + matrix: + release_params: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases }} + max-parallel: 1 + name: Release ${{ matrix.release_params.release_branch }} + uses: ./.github/workflows/create_release.yml + with: + ref: ${{ matrix.release_params.commit_sha }} + type: patch + dry-run: ${{ fromJson(needs.AutoReleaseInfo.outputs.dry_run) }} + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + + CleanUp: + needs: [Releases] + runs-on: [self-hosted, release-maker] + steps: + - uses: ./.github/actions/clean + with: + images: true + +# PostSlackMessage: +# needs: [Releases] +# runs-on: [self-hosted, release-maker] +# if: ${{ !cancelled() }} +# steps: +# - name: Check out repository code +# uses: ClickHouse/checkout@v1 +# - name: Post +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index d4993b373df..1fb6cb60e96 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -2,6 +2,7 @@ name: CreateRelease concurrency: group: release + 'on': workflow_dispatch: inputs: @@ -26,6 +27,28 @@ concurrency: required: false default: false type: boolean + workflow_call: + inputs: + ref: + description: 'Git reference (branch or commit sha) from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: string + only-repo: + description: 'Run only repos updates including docker (repo-recovery, tests)' + required: false + default: false + type: boolean + dry-run: + description: 'Dry run' + required: false + default: false + type: boolean + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: jobs: CreateRelease: @@ -101,6 +124,7 @@ jobs: --volume=".:/wd" --workdir="/wd" \ clickhouse/style-test \ ./tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} \ --jobs=5 \ --output="./docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md @@ -129,9 +153,9 @@ jobs: if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-completed git reset --hard HEAD git checkout "$GITHUB_REF_NAME" + python3 ./tests/ci/create_release.py --set-progress-completed - name: Create GH Release if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index a5cd6321e8c..ec119b6ff95 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -130,6 +130,7 @@ jobs: with: build_name: package_debug data: ${{ needs.RunConfig.outputs.data }} + force: true BuilderBinDarwin: needs: [RunConfig, BuildDockers] if: ${{ !failure() && !cancelled() }} @@ -482,7 +483,7 @@ jobs: if: ${{ !failure() }} run: | # update overall ci report - python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + python3 ./tests/ci/finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} - name: Check Workflow results if: ${{ !cancelled() }} run: | @@ -490,5 +491,4 @@ jobs: cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.gitmodules b/.gitmodules index 7fdfb1103c5..53ebde0cd3b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -108,7 +108,7 @@ url = https://github.com/ClickHouse/icudata [submodule "contrib/icu"] path = contrib/icu - url = https://github.com/unicode-org/icu + url = https://github.com/ClickHouse/icu [submodule "contrib/flatbuffers"] path = contrib/flatbuffers url = https://github.com/ClickHouse/flatbuffers @@ -230,9 +230,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng -[submodule "contrib/annoy"] - path = contrib/annoy - url = https://github.com/ClickHouse/annoy [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl @@ -348,9 +345,6 @@ [submodule "contrib/FP16"] path = contrib/FP16 url = https://github.com/Maratyszcza/FP16.git -[submodule "contrib/robin-map"] - path = contrib/robin-map - url = https://github.com/Tessil/robin-map.git [submodule "contrib/aklomp-base64"] path = contrib/aklomp-base64 url = https://github.com/aklomp/base64.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 1793fd14ccd..93dcfa18999 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.8 LTS, 2024-08-20](#243)**
**[ClickHouse release v24.7, 2024-07-30](#247)**
**[ClickHouse release v24.6, 2024-07-01](#246)**
**[ClickHouse release v24.5, 2024-05-30](#245)**
@@ -10,6 +11,153 @@ # 2024 Changelog +### ClickHouse release 24.8 LTS, 2024-08-20 + +#### Backward Incompatible Change +* `clickhouse-client` and `clickhouse-local` now default to multi-query mode (instead single-query mode). As an example, `clickhouse-client -q "SELECT 1; SELECT 2"` now works, whereas users previously had to add `--multiquery` (or `-n`). The `--multiquery/-n` switch became obsolete. INSERT queries in multi-query statements are treated specially based on their FORMAT clause: If the FORMAT is `VALUES` (the most common case), the end of the INSERT statement is represented by a trailing semicolon `;` at the end of the query. For all other FORMATs (e.g. `CSV` or `JSONEachRow`), the end of the INSERT statement is represented by two newlines `\n\n` at the end of the query. [#63898](https://github.com/ClickHouse/ClickHouse/pull/63898) ([FFish](https://github.com/wxybear)). +* In previous versions, it was possible to use an alternative syntax for `LowCardinality` data types by appending `WithDictionary` to the name of the data type. It was an initial working implementation, and it was never documented or exposed to the public. Now, it is deprecated. If you have used this syntax, you have to ALTER your tables and rename the data types to `LowCardinality`. [#66842](https://github.com/ClickHouse/ClickHouse/pull/66842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix logical errors with storage `Buffer` used with distributed destination table. It's a backward incompatible change: queries using `Buffer` with a distributed destination table may stop working if the table appears more than once in the query (e.g., in a self-join). [#67015](https://github.com/ClickHouse/ClickHouse/pull/67015) ([vdimir](https://github.com/vdimir)). +* In previous versions, calling functions for random distributions based on the Gamma function (such as Chi-Squared, Student, Fisher) with negative arguments close to zero led to a long computation or an infinite loop. In the new version, calling these functions with zero or negative arguments will produce an exception. This closes [#67297](https://github.com/ClickHouse/ClickHouse/issues/67297). [#67326](https://github.com/ClickHouse/ClickHouse/pull/67326) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The system table `text_log` is enabled by default. This is fully compatible with previous versions, but you may notice subtly increased disk usage on the local disk (this system table takes a tiny amount of disk space). [#67428](https://github.com/ClickHouse/ClickHouse/pull/67428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, `arrayWithConstant` can be slow if asked to generate very large arrays. In the new version, it is limited to 1 GB per array. This closes [#32754](https://github.com/ClickHouse/ClickHouse/issues/32754). [#67741](https://github.com/ClickHouse/ClickHouse/pull/67741) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix REPLACE modifier formatting (forbid omitting brackets). [#67774](https://github.com/ClickHouse/ClickHouse/pull/67774) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68349](https://github.com/ClickHouse/ClickHouse/issues/68349): Reimplement `Dynamic` type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into `Dynamic` column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)). +* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)). +* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)). +* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)). +* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)). +* Added a new server setting, `disable_insertion_and_mutation`. If it is enabled, the server will deny all insertions and mutations. This includes asynchronous INSERTs. This setting can be used to create read-only replicas. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)). +* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)). +* Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)). +* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)). +* Add a query `ALTER TABLE ... DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)). +* Add the `rows_before_aggregation_at_least` statistic to the query response when a new setting, `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)). +* Support `OPTIMIZE` query on `Join` tables to reduce their memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)). +* Allow run query instantly in play if you add `&run=1` in the URL [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)). + +#### Experimental Feature +* Implement a new `JSON` data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)). +* Add the new `TimeSeries` table engine. [#64183](https://github.com/ClickHouse/ClickHouse/pull/64183) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add new experimental `Kafka` storage engine to store offsets in Keeper instead of relying on committing them to Kafka. It makes the commit to ClickHouse tables atomic with regard to consumption from the queue. [#57625](https://github.com/ClickHouse/ClickHouse/pull/57625) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Use adaptive read task size calculation method (adaptive meaning it depends on read column sizes) for parallel replicas. [#60377](https://github.com/ClickHouse/ClickHouse/pull/60377) ([Nikita Taranov](https://github.com/nickitat)). +* Added statistics type `count_min` (count-min sketches) which provide selectivity estimations for equality predicates like `col = 'val'`. Supported data types are string, date, datatime and numeric types. [#65521](https://github.com/ClickHouse/ClickHouse/pull/65521) ([JackyWoo](https://github.com/JackyWoo)). + +#### Performance Improvement +* Setting `optimize_functions_to_subcolumns` is enabled by default. [#68053](https://github.com/ClickHouse/ClickHouse/pull/68053) ([Anton Popov](https://github.com/CurtizJ)). +* Store the `plain_rewritable` disk directory metadata in `__meta` layout, separately from the merge tree data in the object storage. Move the `plain_rewritable` disk to a flat directory structure. [#65751](https://github.com/ClickHouse/ClickHouse/pull/65751) ([Julia Kartseva](https://github.com/jkartseva)). +* Improve columns squashing (an operation happening in INSERT queries) for `String`/`Array`/`Map`/`Variant`/`Dynamic` types by reserving required memory in advance for all subcolumns. [#67043](https://github.com/ClickHouse/ClickHouse/pull/67043) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up `SYSTEM FLUSH LOGS` and flush logs on shutdown. [#67472](https://github.com/ClickHouse/ClickHouse/pull/67472) ([Sema Checherinda](https://github.com/CheSema)). +* Improved overall performance of merges by reducing the overhead of the scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)). +* Speed up tables removal for `DROP DATABASE` query, increased the default value for `database_catalog_drop_table_concurrency` to 16. [#67228](https://github.com/ClickHouse/ClickHouse/pull/67228) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Avoid allocating too much capacity for array column while writing ORC. Performance speeds up 15% for an Array column. [#67879](https://github.com/ClickHouse/ClickHouse/pull/67879) ([李扬](https://github.com/taiyang-li)). +* Speed up mutations for non-replicated MergeTree significantly [#66911](https://github.com/ClickHouse/ClickHouse/pull/66911) [#66909](https://github.com/ClickHouse/ClickHouse/pull/66909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Setting `allow_experimental_analyzer` is renamed to `enable_analyzer`. The old name is preserved in a form of an alias. This signifies that Analyzer is no longer in beta and is fully promoted to production. [#66438](https://github.com/ClickHouse/ClickHouse/pull/66438) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)). +* ClickHouse server now supports new setting `max_keep_alive_requests`. For keep-alive HTTP connections to the server it works in tandem with `keep_alive_timeout` - if idle timeout not expired but there already more than `max_keep_alive_requests` requests done through the given connection - it will be closed by the server. [#61793](https://github.com/ClickHouse/ClickHouse/pull/61793) ([Nikita Taranov](https://github.com/nickitat)). +* Various improvements in the advanced dashboard. This closes [#67697](https://github.com/ClickHouse/ClickHouse/issues/67697). This closes [#63407](https://github.com/ClickHouse/ClickHouse/issues/63407). This closes [#51129](https://github.com/ClickHouse/ClickHouse/issues/51129). This closes [#61204](https://github.com/ClickHouse/ClickHouse/issues/61204). [#67701](https://github.com/ClickHouse/ClickHouse/pull/67701) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not require a grant for REMOTE when creating a Distributed table: a grant for the Distributed engine is enough. [#65419](https://github.com/ClickHouse/ClickHouse/pull/65419) ([jsc0218](https://github.com/jsc0218)). +* Do not pass logs for keeper explicitly in the Docker image to allow overriding. [#65564](https://github.com/ClickHouse/ClickHouse/pull/65564) ([Azat Khuzhin](https://github.com/azat)). +* Introduced `use_same_password_for_base_backup` settings for `BACKUP` and `RESTORE` queries, allowing to create and restore incremental backups to/from password protected archives. [#66214](https://github.com/ClickHouse/ClickHouse/pull/66214) ([Samuele](https://github.com/sguerrini97)). +* Ignore `async_load_databases` for `ATTACH` query (previously it was possible for ATTACH to return before the tables had been attached). [#66240](https://github.com/ClickHouse/ClickHouse/pull/66240) ([Azat Khuzhin](https://github.com/azat)). +* Added logs and metrics for rejected connections (where there are not enough resources). [#66410](https://github.com/ClickHouse/ClickHouse/pull/66410) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support proper `UUID` type for MongoDB engine. [#66671](https://github.com/ClickHouse/ClickHouse/pull/66671) ([Azat Khuzhin](https://github.com/azat)). +* Add replication lag and recovery time metrics. [#66703](https://github.com/ClickHouse/ClickHouse/pull/66703) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add `DiskS3NoSuchKeyErrors` metric. [#66704](https://github.com/ClickHouse/ClickHouse/pull/66704) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Ensure the `COMMENT` clause works for all table engines. [#66832](https://github.com/ClickHouse/ClickHouse/pull/66832) ([Joe Lynch](https://github.com/joelynch)). +* Function `mapFromArrays` now accepts `Map(K, V)` as first argument, for example: `SELECT mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb'])` now works and returns `{('a',4):'aa',('b',4):'bb'}`. Also, if the 1st argument is an Array, it can now also be of type `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as long as the actual array values are not `NULL`. [#67103](https://github.com/ClickHouse/ClickHouse/pull/67103) ([李扬](https://github.com/taiyang-li)). +* Read configuration for `clickhouse-local` from `~/.clickhouse-local`. [#67135](https://github.com/ClickHouse/ClickHouse/pull/67135) ([Azat Khuzhin](https://github.com/azat)). +* Rename setting `input_format_orc_read_use_writer_time_zone` to `input_format_orc_reader_timezone` and allow the user to set the reader timezone. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)). +* Decrease level of the `Socket is not connected` error when HTTP connection immediately reset by peer after connecting, close [#34218](https://github.com/ClickHouse/ClickHouse/issues/34218). [#67177](https://github.com/ClickHouse/ClickHouse/pull/67177) ([vdimir](https://github.com/vdimir)). +* Add ability to load dashboards for `system.dashboards` from config (once set, they overrides the default dashboards preset). [#67232](https://github.com/ClickHouse/ClickHouse/pull/67232) ([Azat Khuzhin](https://github.com/azat)). +* The window functions in SQL are traditionally in snake case. ClickHouse uses `camelCase`, so new aliases `denseRank()` and `percentRank()` have been created. These new functions can be called the exact same as the original `dense_rank()` and `percent_rank()` functions. Both snake case and camelCase syntaxes remain usable. A new test for each of the functions has been added as well. This closes [#67042](https://github.com/ClickHouse/ClickHouse/issues/67042) . [#67334](https://github.com/ClickHouse/ClickHouse/pull/67334) ([Peter Nguyen](https://github.com/petern48)). +* Autodetect configuration file format if is not `.xml`, `.yml` or `.yaml`. If the file begins with < it might be XML, otherwise it might be YAML. It is useful when providing a configuration file from a pipe: `clickhouse-server --config-file <(echo "hello: world")`. [#67391](https://github.com/ClickHouse/ClickHouse/pull/67391) ([sakulali](https://github.com/sakulali)). +* Functions `formatDateTime` and `formatDateTimeInJodaSyntax` now treat their format parameter as optional. If it is not specified, format strings `%Y-%m-%d %H:%i:%s` and `yyyy-MM-dd HH:mm:ss` are assumed. Example: `SELECT parseDateTime('2021-01-04 23:12:34')` now returns DateTime value `2021-01-04 23:12:34` (previously, this threw an exception). [#67399](https://github.com/ClickHouse/ClickHouse/pull/67399) ([Robert Schulze](https://github.com/rschu1ze)). +* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)). +* Add `-no-pie` to Aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)). +* Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)). +* Fix settings and `current_database` in `system.processes` for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)). +* Remove unnecessary logs for non-replicated `MergeTree`. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)). + +#### Build/Testing/Packaging Improvement +* Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeating works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)). +* Unblock the usage of CLion with ClickHouse. In previous versions, CLion freezed for a minute on every keypress. This closes [#66994](https://github.com/ClickHouse/ClickHouse/issues/66994). [#66995](https://github.com/ClickHouse/ClickHouse/pull/66995) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* getauxval: avoid a crash under a sanitizer re-exec due to high ASLR entropy in newer Linux kernels. [#67081](https://github.com/ClickHouse/ClickHouse/pull/67081) ([Raúl Marín](https://github.com/Algunenano)). +* Some parts of client code are extracted to a single file and highest possible level optimization is applied to them even for debug builds. This closes: [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). [#67215](https://github.com/ClickHouse/ClickHouse/pull/67215) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix +* Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Fix formatting of query with aliased JOIN ON expression, e.g. `... JOIN t2 ON (x = y) AS e ORDER BY x` should be formatted as `... JOIN t2 ON ((x = y) AS e) ORDER BY x`. [#66312](https://github.com/ClickHouse/ClickHouse/pull/66312) ([vdimir](https://github.com/vdimir)). +* Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible runtime error while converting Array field with nulls to Array(Variant). [#66727](https://github.com/ClickHouse/ClickHouse/pull/66727) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Fix creating KeeperMap table after an incomplete drop. [#66865](https://github.com/ClickHouse/ClickHouse/pull/66865) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix broken part error while restoring to a `s3_plain_rewritable` disk. [#66881](https://github.com/ClickHouse/ClickHouse/pull/66881) ([Vitaly Baranov](https://github.com/vitlibar)). +* In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Fix invalid format detection in schema inference that could lead to logical error Format {} doesn't support schema inference. [#66899](https://github.com/ClickHouse/ClickHouse/pull/66899) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)). +* Reading from the `numbers` could wrongly throw an exception when the `max_rows_to_read` limit was set. This closes [#66992](https://github.com/ClickHouse/ClickHouse/issues/66992). [#66996](https://github.com/ClickHouse/ClickHouse/pull/66996) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add proper type conversion to lagInFrame and leadInFrame window functions - fixes msan test. [#67091](https://github.com/ClickHouse/ClickHouse/pull/67091) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use a separate client context in `clickhouse-local`. [#67133](https://github.com/ClickHouse/ClickHouse/pull/67133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)). +* Fix an error in optimization which converts OUTER JOIN to INNER JOIN. This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)). +* It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix attaching ReplicatedMergeTree table after exception during startup. [#67360](https://github.com/ClickHouse/ClickHouse/pull/67360) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by incorrectly detaching from thread group in `Aggregator`. [#67385](https://github.com/ClickHouse/ClickHouse/pull/67385) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix one more case when a non-deterministic function is specified in PK. [#67395](https://github.com/ClickHouse/ClickHouse/pull/67395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `bloom_filter` index breaking queries with mildly weird conditions like `(k=2)=(k=2)` or `has([1,2,3], k)`. [#67423](https://github.com/ClickHouse/ClickHouse/pull/67423) ([Michael Kolupaev](https://github.com/al13n321)). +* Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix wait for tasks in ~WriteBufferFromS3 in case WriteBuffer was cancelled. [#67459](https://github.com/ClickHouse/ClickHouse/pull/67459) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Protect temporary part directories from removing during RESTORE. [#67491](https://github.com/ClickHouse/ClickHouse/pull/67491) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Logical error: Expected the argument №N of type T to have X rows, but it has 0`. The error could happen in a remote query with constant expression in `GROUP BY` (with a new analyzer). [#67536](https://github.com/ClickHouse/ClickHouse/pull/67536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix join on tuple with NULLs: Some queries with the new analyzer and `NULL` inside the tuple in the `JOIN ON` section returned incorrect results. [#67538](https://github.com/ClickHouse/ClickHouse/pull/67538) ([vdimir](https://github.com/vdimir)). +* Fix redundant reschedule of FileCache::freeSpaceRatioKeepingThreadFunc() in case of full non-evictable cache. [#67540](https://github.com/ClickHouse/ClickHouse/pull/67540) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix for function `toStartOfWeek` which returned the wrong result with a small `DateTime64` value. [#67558](https://github.com/ClickHouse/ClickHouse/pull/67558) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix `Logical error: 'file_offset_of_buffer_end <= read_until_position'` in filesystem cache. Closes [#57508](https://github.com/ClickHouse/ClickHouse/issues/57508). [#67623](https://github.com/ClickHouse/ClickHouse/pull/67623) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes [#62282](https://github.com/ClickHouse/ClickHouse/issues/62282). Removed the call to `convertFieldToString()` and added datatype specific serialization code. Parameterized view substitution was broken for multiple datatypes when parameter value was a function or expression returning datatype instance. [#67654](https://github.com/ClickHouse/ClickHouse/pull/67654) ([Shankar](https://github.com/shiyer7474)). +* Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible logical error "Unexpected return type from if" with experimental Variant type and enabled setting `use_variant_as_common_type ` in function if with Tuples and Maps. [#67687](https://github.com/ClickHouse/ClickHouse/pull/67687) ([Kruglov Pavel](https://github.com/Avogar)). +* Due to a bug in Linux Kernel, a query can hung in `TimerDescriptor::drain`. This closes [#37686](https://github.com/ClickHouse/ClickHouse/issues/37686). [#67702](https://github.com/ClickHouse/ClickHouse/pull/67702) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix completion of `RESTORE ON CLUSTER` command. [#67720](https://github.com/ClickHouse/ClickHouse/pull/67720) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading. [#67751](https://github.com/ClickHouse/ClickHouse/pull/67751) ([Azat Khuzhin](https://github.com/azat)). +* Queries like `SELECT count() FROM t WHERE cast(c = 1 or c = 9999 AS Bool) SETTINGS use_skip_indexes=1` with bloom filter indexes on `c` now work correctly. [#67781](https://github.com/ClickHouse/ClickHouse/pull/67781) ([jsc0218](https://github.com/jsc0218)). +* Fix wrong aggregation result in some queries with aggregation without keys and filter, close [#67419](https://github.com/ClickHouse/ClickHouse/issues/67419). [#67804](https://github.com/ClickHouse/ClickHouse/pull/67804) ([vdimir](https://github.com/vdimir)). +* Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix DateTime64 parsing after constant folding in distributed queries, close [#66773](https://github.com/ClickHouse/ClickHouse/issues/66773). [#67920](https://github.com/ClickHouse/ClickHouse/pull/67920) ([vdimir](https://github.com/vdimir)). +* Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Now ClickHouse doesn't consider part as broken if projection doesn't exist on disk but exists in `checksums.txt`. [#68003](https://github.com/ClickHouse/ClickHouse/pull/68003) ([alesapin](https://github.com/alesapin)). +* Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). + + ### ClickHouse release 24.7, 2024-07-30 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index f796e6c4616..6abf48a6927 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,14 +187,6 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () -if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - # Can be lld or ld-lld or lld-13 or /path/to/lld. - if (LINKER_NAME MATCHES "lld") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") - message (STATUS "Adding .gdb-index via --gdb-index linker option.") - endif () -endif() - if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE) AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" @@ -330,17 +322,21 @@ if (DISABLE_OMIT_FRAME_POINTER) set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer") endif() +# Before you start hating your debugger because it refuses to show variables (''), try building with -DDEBUG_O_LEVEL="0" +# https://stackoverflow.com/questions/63386189/whats-the-difference-between-a-compilers-o0-option-and-og-option/63386263#63386263 +set(DEBUG_O_LEVEL "g" CACHE STRING "The -Ox level used for debug builds") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (OS_DARWIN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") @@ -402,7 +398,7 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(ENABLE_GWP_ASAN OFF) endif () -option (ENABLE_FIU "Enable Fiu" ON) +option (ENABLE_LIBFIU "Enable libfiu" ON) option(WERROR "Enable -Werror compiler option" ON) @@ -428,12 +424,17 @@ if (NOT SANITIZE) set (CMAKE_POSITION_INDEPENDENT_CODE OFF) endif() -if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE) - # Slightly more efficient code can be generated - # It's disabled for ARM because otherwise ClickHouse cannot run on Android. +if (NOT OS_ANDROID AND OS_LINUX AND NOT ARCH_S390X AND NOT SANITIZE) + # Using '-no-pie' builds executables with fixed addresses, resulting in slightly more efficient code + # and keeping binary addresses constant even with ASLR enabled. + # Disabled on Android as it requires PIE: https://source.android.com/docs/security/enhancements#android-5 + # Disabled on IBM S390X due to build issues with 'no-pie' + # Disabled with sanitizers to avoid issues with maximum relocation size: https://github.com/ClickHouse/ClickHouse/pull/49145 set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie") +else () + message (WARNING "ClickHouse is built as PIE, system.trace_log will contain invalid addresses after server restart.") endif () if (ENABLE_TESTS) @@ -604,7 +605,9 @@ if (NATIVE_BUILD_TARGETS execute_process( COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -616,9 +619,13 @@ if (NATIVE_BUILD_TARGETS "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" ${PROJECT_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () diff --git a/PreLoad.cmake b/PreLoad.cmake index e0fd37b2fd6..92b221c9f63 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -51,8 +51,14 @@ if (NOT "$ENV{CFLAGS}" STREQUAL "" endif() # Default toolchain - this is needed to avoid dependency on OS files. -execute_process(COMMAND uname -s OUTPUT_VARIABLE OS) -execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH) +execute_process(COMMAND uname -s + OUTPUT_VARIABLE OS + COMMAND_ERROR_IS_FATAL ANY +) +execute_process(COMMAND uname -m + OUTPUT_VARIABLE ARCH + COMMAND_ERROR_IS_FATAL ANY +) # By default, prefer clang on Linux # But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER. diff --git a/README.md b/README.md index 2120a4d1211..ba212852ea8 100644 --- a/README.md +++ b/README.md @@ -34,17 +34,32 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 29 +* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* MORE COMING SOON! +The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov: + +* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 +* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 +* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9 +* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 +* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 + +Other upcoming meetups +* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 +* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 +* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 +* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 +* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10 +* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17 +* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! +* **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now! ## Interested in joining ClickHouse and making it your full-time job? diff --git a/SECURITY.md b/SECURITY.md index 8930dc96f8a..93c48f1d9ba 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,25 +14,15 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.8 | ✔️ | | 24.7 | ✔️ | | 24.6 | ✔️ | -| 24.5 | ✔️ | +| 24.5 | ❌ | | 24.4 | ❌ | | 24.3 | ✔️ | | 24.2 | ❌ | | 24.1 | ❌ | -| 23.12 | ❌ | -| 23.11 | ❌ | -| 23.10 | ❌ | -| 23.9 | ❌ | -| 23.8 | ✔️ | -| 23.7 | ❌ | -| 23.6 | ❌ | -| 23.5 | ❌ | -| 23.4 | ❌ | -| 23.3 | ❌ | -| 23.2 | ❌ | -| 23.1 | ❌ | +| 23.* | ❌ | | 22.* | ❌ | | 21.* | ❌ | | 20.* | ❌ | diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 341c92d3042..3d236f52c36 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -1,4 +1,4 @@ -add_compile_options($<$,$>:${COVERAGE_FLAGS}>) +add_compile_options("$<$,$>:${COVERAGE_FLAGS}>") if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") @@ -8,6 +8,8 @@ endif () # when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16. set (CMAKE_CXX_STANDARD 20) +configure_file(GitHash.cpp.in GitHash.generated.cpp) + set (SRCS argsToConfig.cpp cgroupsv2.cpp @@ -33,6 +35,7 @@ set (SRCS safeExit.cpp throwError.cpp Numa.cpp + GitHash.generated.cpp ) add_library (common ${SRCS}) diff --git a/src/Daemon/GitHash.cpp.in b/base/base/GitHash.cpp.in similarity index 100% rename from src/Daemon/GitHash.cpp.in rename to base/base/GitHash.cpp.in diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index 87f62bf377d..e0e37c8729b 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -27,27 +27,6 @@ bool cgroupsV2Enabled() #endif } -bool cgroupsV2MemoryControllerEnabled() -{ -#if defined(OS_LINUX) - chassert(cgroupsV2Enabled()); - /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available - /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file - /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file. - fs::path cgroup_dir = cgroupV2PathOfProcess(); - if (cgroup_dir.empty()) - return false; - std::ifstream controllers_file(cgroup_dir / "cgroup.controllers"); - if (!controllers_file.is_open()) - return false; - std::string controllers; - std::getline(controllers_file, controllers); - return controllers.find("memory") != std::string::npos; -#else - return false; -#endif -} - fs::path cgroupV2PathOfProcess() { #if defined(OS_LINUX) @@ -71,3 +50,28 @@ fs::path cgroupV2PathOfProcess() return {}; #endif } + +std::optional getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name) +{ +#if defined(OS_LINUX) + if (!cgroupsV2Enabled()) + return {}; + + fs::path current_cgroup = cgroupV2PathOfProcess(); + if (current_cgroup.empty()) + return {}; + + /// Return the bottom-most nested file. If there is no such file at the current + /// level, try again at the parent level as settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto path = current_cgroup / file_name; + if (fs::exists(path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +#else + return {}; +#endif +} diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index cfb916ff358..a6276474254 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if defined(OS_LINUX) /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers). @@ -11,11 +12,11 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou /// Is cgroups v2 enabled on the system? bool cgroupsV2Enabled(); -/// Is the memory controller of cgroups v2 enabled on the system? -/// Assumes that cgroupsV2Enabled() is enabled. -bool cgroupsV2MemoryControllerEnabled(); - /// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup. /// Returns an empty path the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. std::filesystem::path cgroupV2PathOfProcess(); + +/// Returns the most nested cgroup dir containing the specified file. +/// If cgroups v2 is not enabled - returns an empty optional. +std::optional getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name); diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 03aab1eac72..bbfbecdbffd 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -19,9 +19,6 @@ std::optional getCgroupsV2MemoryLimit() if (!cgroupsV2Enabled()) return {}; - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - std::filesystem::path current_cgroup = cgroupV2PathOfProcess(); if (current_cgroup.empty()) return {}; diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h index 3df7995509a..b0659ca405c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h @@ -58,6 +58,10 @@ namespace Net void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout); + size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); } + + size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; } + private: bool _firstRequest; Poco::Timespan _keepAliveTimeout; diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp index f67a63a9e0e..8eec3e14872 100644 --- a/base/poco/Net/src/HTTPServerSession.cpp +++ b/base/poco/Net/src/HTTPServerSession.cpp @@ -19,11 +19,11 @@ namespace Poco { namespace Net { -HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams): - HTTPSession(socket, pParams->getKeepAlive()), - _firstRequest(true), - _keepAliveTimeout(pParams->getKeepAliveTimeout()), - _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests()) +HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams) + : HTTPSession(socket, pParams->getKeepAlive()) + , _firstRequest(true) + , _keepAliveTimeout(pParams->getKeepAliveTimeout()) + , _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests()) { setTimeout(pParams->getTimeout()); } @@ -52,11 +52,12 @@ bool HTTPServerSession::hasMoreRequests() } else if (_maxKeepAliveRequests != 0 && getKeepAlive()) { - if (_maxKeepAliveRequests > 0) - --_maxKeepAliveRequests; - return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ); - } - else return false; + if (_maxKeepAliveRequests > 0) + --_maxKeepAliveRequests; + return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ); + } + else + return false; } diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index 4873d259ae5..eaf267d8a8b 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) while (mustRetry(rc, remaining_time)); if (rc <= 0) { + // At this stage we still can have last not yet received SSL message containing SSL error + // so make a read to force SSL to process possible SSL error + if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET) + { + char c = 0; + SSL_read(_pSSL, &c, 1); + } + rc = handleError(rc); if (rc == 0) throw SSLConnectionUnexpectedlyClosedException(); } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d69646d3694..c82038804fe 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54489) +SET(VERSION_REVISION 54490) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 8) +SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) -SET(VERSION_DESCRIBE v24.8.1.1-testing) -SET(VERSION_STRING 24.8.1.1) +SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608) +SET(VERSION_DESCRIBE v24.9.1.1-testing) +SET(VERSION_STRING 24.9.1.1) # end of autochange diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 6bde75f8c9a..3f5b3829877 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -9,10 +9,18 @@ endif () file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") message(STATUS "-Bprefix: ${bprefix}") -execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") message(FATAL_ERROR "libclang_rt.builtins had not been found") diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake index 4635880b4a6..4d814693b39 100644 --- a/cmake/freebsd/toolchain-x86_64.cmake +++ b/cmake/freebsd/toolchain-x86_64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11") set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11") set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64") +# dprintf is used in a patched version of replxx +add_compile_definitions(_WITH_DPRINTF) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 17d8dd42a2c..8e48fc9b9d8 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,19 +42,9 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) - if (ARCH_AARCH64) - # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") - set (PARALLEL_LINK_JOBS 1) - if (LINKER_NAME MATCHES "lld") - math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") - endif() - elseif (PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) - endif () +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 4a06243243e..51620bc9f33 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -5,7 +5,11 @@ set (DEFAULT_LIBS "-nodefaultlibs") # We need builtins from Clang's RT even without libcxx - for ubsan+int128. # See https://bugs.llvm.org/show_bug.cgi?id=16404 -execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process (COMMAND + ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both. if (SANITIZE STREQUAL undefined) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index d8d211b9329..091e20e1387 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat if (WITH_COVERAGE) message (STATUS "Enabled instrumentation for code coverage") - set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") endif() option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 7aa5d4c51ce..5c7da54b779 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -5,7 +5,11 @@ if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") endif () # Print details to output -execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE +) message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") # Require minimum compiler versions diff --git a/cmake/utils.cmake b/cmake/utils.cmake index a318408098a..a99d8e050a8 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -90,7 +90,10 @@ endfunction() # Function get_cmake_properties returns list of all propreties that cmake supports function(get_cmake_properties outvar) - execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties) + execute_process(COMMAND cmake --help-property-list + OUTPUT_VARIABLE cmake_properties + COMMAND_ERROR_IS_FATAL ANY + ) # Convert command output into a CMake list string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}") string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 977efda15ff..d7489bc5c0e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -71,7 +71,6 @@ add_contrib (zlib-ng-cmake zlib-ng) add_contrib (bzip2-cmake bzip2) add_contrib (minizip-ng-cmake minizip-ng) add_contrib (snappy-cmake snappy) -add_contrib (rocksdb-cmake rocksdb) add_contrib (thrift-cmake thrift) # parquet/arrow/orc add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion @@ -148,6 +147,7 @@ add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arro add_contrib (cppkafka-cmake cppkafka) add_contrib (libpqxx-cmake libpqxx) add_contrib (libpq-cmake libpq) +add_contrib (rocksdb-cmake rocksdb) # requires: jemalloc, snappy, zlib, lz4, zstd, liburing add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (idna-cmake idna) @@ -179,7 +179,7 @@ else() message(STATUS "Not using QPL") endif () -if (OS_LINUX AND ARCH_AMD64) +if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QATLIB) message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") @@ -205,14 +205,12 @@ add_contrib (morton-nd-cmake morton-nd) if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() -add_contrib (annoy-cmake annoy) -option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES}) +option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES}) if (ENABLE_USEARCH) add_contrib (FP16-cmake FP16) - add_contrib (robin-map-cmake robin-map) add_contrib (SimSIMD-cmake SimSIMD) - add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD + add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD else () message(STATUS "Not using USearch") endif () diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt index 72d21a8572b..fc18092f574 100644 --- a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -27,7 +27,7 @@ if (ENABLE_QAT_OUT_OF_TREE_BUILD) ${QAT_AL_INCLUDE_DIR} ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) else () # In-tree build message(STATUS "Intel QATZSTD in-tree build") @@ -78,7 +78,7 @@ else () # In-tree build ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR} ${LIBQAT_HEADER_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) endif () diff --git a/contrib/SimSIMD b/contrib/SimSIMD index de2cb75b9e9..91a76d1ac51 160000 --- a/contrib/SimSIMD +++ b/contrib/SimSIMD @@ -1 +1 @@ -Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf +Subproject commit 91a76d1ac519b3b9dc8957734a3dabd985f00c26 diff --git a/contrib/annoy b/contrib/annoy deleted file mode 160000 index f2ac8e7b48f..00000000000 --- a/contrib/annoy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956 diff --git a/contrib/annoy-cmake/CMakeLists.txt b/contrib/annoy-cmake/CMakeLists.txt deleted file mode 100644 index bdef7d92132..00000000000 --- a/contrib/annoy-cmake/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES}) - -# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations -# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463) -# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo -# https://github.com/spotify/annoy/issues/456 -# Problem with aligment can lead to errors like -# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment) -# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest -# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index") -if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64)) - message (STATUS "Not using annoy") - return() -endif() - -set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy") -set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src") - -add_library(_annoy INTERFACE) -target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR}) - -add_library(ch_contrib::annoy ALIAS _annoy) -target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY) -target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD) diff --git a/contrib/aws b/contrib/aws index 1c2946bfcb7..d5450d76abd 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf +Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59 diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index f532d6abc0d..e5aa45cacfd 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 +Subproject commit e5aa45cacfdcda7719ead38760e7c61076f5745f diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 7161f743de1..fadf948b053 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -37,7 +37,9 @@ message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE TIMEZONES) + OUTPUT_VARIABLE TIMEZONES + COMMAND_ERROR_IS_FATAL ANY +) file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") file(APPEND ${TIMEZONES_FILE} "#include \n") diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt index e44f737cfc3..f1a744f851f 100644 --- a/contrib/google-protobuf-cmake/CMakeLists.txt +++ b/contrib/google-protobuf-cmake/CMakeLists.txt @@ -359,7 +359,9 @@ else () execute_process( COMMAND mkdir -p ${PROTOC_BUILD_DIR} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -375,11 +377,15 @@ else () "-DABSL_ENABLE_INSTALL=0" "${protobuf_source_dir}" WORKING_DIRECTORY "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () add_executable(protoc IMPORTED GLOBAL) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 1c0bf41ff78..975774d1990 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -51,8 +51,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") execute_process( - COMMAND mkdir -p ${OPENSSL_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${OPENSSL_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") @@ -89,15 +90,21 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" "${OPENSSL_SOURCE_DIR}" WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) # It's not important on which file we depend, we just want to specify right order add_library(openssl_for_grpc STATIC IMPORTED GLOBAL) @@ -108,8 +115,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") execute_process( - COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") @@ -140,11 +148,15 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DgRPC_SSL_PROVIDER=package" "${_gRPC_SOURCE_DIR}" WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) add_executable(grpc_cpp_plugin IMPORTED GLOBAL) set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") diff --git a/contrib/icu b/contrib/icu index 7750081bda4..4216173eeeb 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 +Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273 diff --git a/contrib/libfiu-cmake/CMakeLists.txt b/contrib/libfiu-cmake/CMakeLists.txt index e805491edbb..eab55087c98 100644 --- a/contrib/libfiu-cmake/CMakeLists.txt +++ b/contrib/libfiu-cmake/CMakeLists.txt @@ -1,20 +1,21 @@ -if (NOT ENABLE_FIU) - message (STATUS "Not using fiu") +if (NOT ENABLE_LIBFIU) + message (STATUS "Not using libfiu") return () endif () -set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") +set(LIBFIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") -set(FIU_SOURCES - ${FIU_DIR}/libfiu/fiu.c - ${FIU_DIR}/libfiu/fiu-rc.c - ${FIU_DIR}/libfiu/backtrace.c - ${FIU_DIR}/libfiu/wtable.c +set(LIBFIU_SOURCES + ${LIBFIU_DIR}/libfiu/fiu.c + ${LIBFIU_DIR}/libfiu/fiu-rc.c + ${LIBFIU_DIR}/libfiu/backtrace.c + ${LIBFIU_DIR}/libfiu/wtable.c ) -set(FIU_HEADERS "${FIU_DIR}/libfiu") +set(LIBFIU_HEADERS "${LIBFIU_DIR}/libfiu") -add_library(_fiu ${FIU_SOURCES}) -target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE) -target_include_directories(_fiu PUBLIC ${FIU_HEADERS}) -add_library(ch_contrib::fiu ALIAS _fiu) +add_library(_libfiu ${LIBFIU_SOURCES}) +target_compile_definitions(_libfiu PUBLIC DUMMY_BACKTRACE) +target_compile_definitions(_libfiu PUBLIC FIU_ENABLE) +target_include_directories(_libfiu PUBLIC ${LIBFIU_HEADERS}) +add_library(ch_contrib::libfiu ALIAS _libfiu) diff --git a/contrib/libprotobuf-mutator b/contrib/libprotobuf-mutator index 1f95f808306..b922c8ab900 160000 --- a/contrib/libprotobuf-mutator +++ b/contrib/libprotobuf-mutator @@ -1 +1 @@ -Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d +Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa diff --git a/contrib/librdkafka b/contrib/librdkafka index 2d2aab6f5b7..39d4ed49ccf 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082 +Subproject commit 39d4ed49ccf3406e2bf825d5d7b0903b5a290782 diff --git a/contrib/libunwind b/contrib/libunwind index a89d904befe..601db0b0e03 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit a89d904befea07814628c6ce0b44083c4e149c62 +Subproject commit 601db0b0e03018c01710470a37703b618f9cf08b diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index e62612cff5a..89332ae0f7a 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -728,10 +728,6 @@ add_library(_qpl STATIC ${LIB_DEPS}) target_include_directories(_qpl PUBLIC $ $) - -target_compile_definitions(_qpl - PUBLIC -DENABLE_QPL_COMPRESSION) - target_link_libraries(_qpl PRIVATE ch_contrib::accel-config) diff --git a/contrib/replxx b/contrib/replxx index 5d04501f93a..711c18e7f4d 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b +Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb diff --git a/contrib/robin-map b/contrib/robin-map deleted file mode 160000 index 851a59e0e30..00000000000 --- a/contrib/robin-map +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d diff --git a/contrib/robin-map-cmake/CMakeLists.txt b/contrib/robin-map-cmake/CMakeLists.txt deleted file mode 100644 index f82ad705dcc..00000000000 --- a/contrib/robin-map-cmake/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/rocksdb b/contrib/rocksdb index 49ce8a1064d..5f003e4a22d 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 49ce8a1064dd1ad89117899839bf136365e49e79 +Subproject commit 5f003e4a22d2e48e37c98d9620241237cd30dd24 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 57c056532c6..44aa7494607 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -5,36 +5,38 @@ if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL return() endif() -# not in original build system, otherwise xxHash.cc fails to compile with ClickHouse C++23 default -set (CMAKE_CXX_STANDARD 20) - -# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs -option(WITH_JEMALLOC "build with JeMalloc" OFF) - -option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING - # ClickHouse cannot be compiled without snappy, lz4, zlib, zstd option(WITH_SNAPPY "build with SNAPPY" ON) option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) -if(WITH_SNAPPY) +if (ENABLE_JEMALLOC AND OS_LINUX) # gives compile errors with jemalloc enabled for rocksdb on non-Linux + add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) + list (APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) +endif () + +if (ENABLE_LIBURING) + add_definitions(-DROCKSDB_IOURING_PRESENT) + list (APPEND THIRDPARTY_LIBS ch_contrib::liburing) +endif () + +if (WITH_SNAPPY) add_definitions(-DSNAPPY) list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() -if(WITH_ZLIB) +if (WITH_ZLIB) add_definitions(-DZLIB) list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() -if(WITH_LZ4) +if (WITH_LZ4) add_definitions(-DLZ4) list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) endif() -if(WITH_ZSTD) +if (WITH_ZSTD) add_definitions(-DZSTD) list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) endif() @@ -88,6 +90,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/attribute_group_iterator_impl.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc @@ -104,6 +107,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc + ${ROCKSDB_SOURCE_DIR}/db/coalescing_iterator.cc ${ROCKSDB_SOURCE_DIR}/db/column_family.cc ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc @@ -124,6 +128,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_follower.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc @@ -181,6 +186,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc ${ROCKSDB_SOURCE_DIR}/env/file_system.cc ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc + ${ROCKSDB_SOURCE_DIR}/env/fs_on_demand.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc @@ -368,6 +374,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc + ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_for_tiering_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc @@ -388,6 +395,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/types_util.cc ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc @@ -418,14 +426,18 @@ if(HAS_ARMV8_CRC) endif(HAS_ARMV8_CRC) list(APPEND SOURCES - "${ROCKSDB_SOURCE_DIR}/port/port_posix.cc" - "${ROCKSDB_SOURCE_DIR}/env/env_posix.cc" - "${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc" - "${ROCKSDB_SOURCE_DIR}/env/io_posix.cc") + ${ROCKSDB_SOURCE_DIR}/port/port_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/env_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/io_posix.cc) add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) +# Not in the native build system but useful anyways: +# Make all functions in xxHash.h inline. Beneficial for performance: https://github.com/Cyan4973/xxHash/tree/v0.8.2#build-modifiers +target_compile_definitions (_rocksdb PRIVATE XXH_INLINE_ALL) + # SYSTEM is required to overcome some issues target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/usearch b/contrib/usearch index 30810452bec..e21a5778a0d 160000 --- a/contrib/usearch +++ b/contrib/usearch @@ -1 +1 @@ -Subproject commit 30810452bec5d3d3aa0931bb5d761e2f09aa6356 +Subproject commit e21a5778a0d4469ddaf38c94b7be0196bb701ee4 diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt index 29fbe57106c..df131e0c528 100644 --- a/contrib/usearch-cmake/CMakeLists.txt +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -1,17 +1,22 @@ -set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch") -set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include") - set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16") -set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map") -set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map") +set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD") +set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch") add_library(_usearch INTERFACE) target_include_directories(_usearch SYSTEM INTERFACE ${FP16_PROJECT_DIR}/include - ${ROBIN_MAP_PROJECT_DIR}/include ${SIMSIMD_PROJECT_DIR}/include - ${USEARCH_SOURCE_DIR}) + ${USEARCH_PROJECT_DIR}/include) + +target_compile_definitions(_usearch INTERFACE USEARCH_USE_FP16LIB) + +# target_compile_definitions(_usearch INTERFACE USEARCH_USE_SIMSIMD) +# ^^ simsimd is not enabled at the moment. Reasons: +# - Vectorization is important for raw scans but not so much for HNSW. We use usearch only for HNSW. +# - Simsimd does compile-time dispatch (choice of SIMD kernels determined by capabilities of the build machine) or dynamic dispatch (SIMD +# kernels chosen at runtime based on cpuid instruction). Since current builds are limited to SSE 4.2 (x86) and NEON (ARM), the speedup of +# the former would be moderate compared to AVX-512 / SVE. The latter is at the moment too fragile with respect to portability across x86 +# and ARM machines ... certain conbinations of quantizations / distance functions / SIMD instructions are not implemented at the moment. add_library(ch_contrib::usearch ALIAS _usearch) -target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH) diff --git a/docker/images.json b/docker/images.json index 716b76ee217..055394b69e6 100644 --- a/docker/images.json +++ b/docker/images.json @@ -47,8 +47,7 @@ "docker/test/stateful": { "name": "clickhouse/stateful-test", "dependent": [ - "docker/test/stress", - "docker/test/upgrade" + "docker/test/stress" ] }, "docker/test/unit": { @@ -59,10 +58,6 @@ "name": "clickhouse/stress-test", "dependent": [] }, - "docker/test/upgrade": { - "name": "clickhouse/upgrade-check", - "dependent": [] - }, "docker/test/integration/runner": { "name": "clickhouse/integration-tests-runner", "dependent": [] diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 94603763572..6ff7ea43374 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.2.13" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index bd5f2fe8466..ba9311c6880 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then bash -x /build/packages/build fi -mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output +mv ./programs/clickhouse* /output ||: +mv ./programs/*_fuzzer /output ||: [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output [ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index d986c6a3c86..570a42d42d5 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -1,3 +1,5 @@ +# docker build -t clickhouse/cctools . + # This is a hack to significantly reduce the build time of the clickhouse/binary-builder # It's based on the assumption that we don't care of the cctools version so much # It event does not depend on the clickhouse/fasttest in the `docker/images.json` @@ -30,5 +32,29 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && cd ../.. \ && rm -rf cctools-port +# +# GDB +# +# ld from binutils is 2.38, which has the following error: +# +# DWARF error: invalid or unhandled FORM value: 0x23 +# +ENV LD=ld.lld-${LLVM_VERSION} +ARG GDB_VERSION=15.1 +RUN apt-get update \ + && apt-get install --yes \ + libgmp-dev \ + libmpfr-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \ + && tar -xvf gdb-$GDB_VERSION.tar.gz \ + && cd gdb-$GDB_VERSION \ + && ./configure --prefix=/opt/gdb \ + && make -j $(nproc) \ + && make install \ + && rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz + FROM scratch COPY --from=builder /cctools /cctools +COPY --from=builder /opt/gdb /opt/gdb diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index f40118c7b06..c87885d3b49 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.2.13" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 032aa862e4a..6ccf74823e2 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.7.2.13" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 4cac2ee6135..ca93b24f66e 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -83,7 +83,7 @@ RUN arch=${TARGETARCH:-amd64} \ # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent -RUN chmod u+s /usr/bin/gdb \ +RUN chmod u+s /opt/gdb/bin/gdb \ && mkdir -p /var/lib/clickhouse \ && chmod 777 /var/lib/clickhouse @@ -93,6 +93,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0 - -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 469251f648c..dc4d470a262 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -11,7 +11,6 @@ RUN apt-get update \ curl \ default-jre \ g++ \ - gdb \ iproute2 \ krb5-user \ libicu-dev \ @@ -73,3 +72,6 @@ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index d250b746e7d..d62009f1be3 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -30,7 +30,6 @@ RUN apt-get update \ luajit \ libssl-dev \ libcurl4-openssl-dev \ - gdb \ default-jdk \ software-properties-common \ libkrb5-dev \ @@ -87,6 +86,8 @@ COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ COPY misc/ /misc/ +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" # Same options as in test/base/Dockerfile # (in case you need to override them in tests) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index c68a39f6f70..f7139275282 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -9,7 +9,6 @@ RUN apt-get update \ curl \ dmidecode \ g++ \ - gdb \ git \ gnuplot \ imagemagick \ @@ -42,6 +41,9 @@ RUN pip3 --no-cache-dir install -r requirements.txt COPY run.sh / +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" + CMD ["bash", "/run.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1425e12cd84..0d21a2da44e 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -35,12 +35,8 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" RUN git clone --recursive ${sqllogic_test_repo} - -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 71d915b0c7a..b805bb03c2b 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/" RUN git clone ${sqltest_repo} ENV TZ=UTC -ENV MAX_RUN_TIME=900 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY run.sh / diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 0daf88cad7e..9aa936cb069 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -10,7 +10,3 @@ RUN apt-get update -y \ npm \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* - -COPY create.sql / -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/setup_minio.sh b/docker/test/stateful/setup_minio.sh deleted file mode 120000 index 0d539f72cb3..00000000000 --- a/docker/test/stateful/setup_minio.sh +++ /dev/null @@ -1 +0,0 @@ -../stateless/setup_minio.sh \ No newline at end of file diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index a0e5513a3a2..69f81b35a95 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -65,12 +65,11 @@ ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 -ENV MAX_RUN_TIME=0 # Unrelated to vars in setup_minio.sh, but should be the same there # to have the same binaries for local running scenario -ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z -ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z +ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z +ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z ARG TARGETARCH # Download Minio-related binaries @@ -86,18 +85,6 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 -ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py" RUN npm install -g azurite@3.30.0 \ && npm install -g tslib && npm install -g node - -COPY run.sh / -COPY setup_minio.sh / -COPY setup_hdfs_minicluster.sh / -COPY attach_gdb.lib / -COPY utils.lib / - -# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests -COPY stress_tests.lib / - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 0f81a1cd07f..ecb98a4e3ed 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -22,8 +22,5 @@ RUN apt-get update -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -COPY run.sh / - ENV EXPORT_S3_STORAGE_POLICIES=1 -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile deleted file mode 100644 index 78d912fd031..00000000000 --- a/docker/test/upgrade/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# rebuild in #33610 -# docker build -t clickhouse/upgrade-check . -ARG FROM_TAG=latest -FROM clickhouse/stateful-test:$FROM_TAG - -RUN apt-get update -y \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --yes --no-install-recommends \ - bash \ - tzdata \ - parallel \ - expect \ - python3 \ - python3-lxml \ - python3-termcolor \ - python3-requests \ - curl \ - sudo \ - openssl \ - netcat-openbsd \ - brotli \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* - -COPY run.sh / - -ENV EXPORT_S3_STORAGE_POLICIES=1 - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index dc928ba7195..6b9fb94a4c6 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - gdb \ git \ gperf \ moreutils \ @@ -57,4 +56,5 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -COPY process_functional_tests_result.py / +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/docs/changelogs/v24.3.7.30-lts.md b/docs/changelogs/v24.3.7.30-lts.md new file mode 100644 index 00000000000..f945a54840f --- /dev/null +++ b/docs/changelogs/v24.3.7.30-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d) + +#### Improvement +* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/docs/changelogs/v24.3.8.13-lts.md b/docs/changelogs/v24.3.8.13-lts.md new file mode 100644 index 00000000000..6fbceacd624 --- /dev/null +++ b/docs/changelogs/v24.3.8.13-lts.md @@ -0,0 +1,16 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.8.13-lts (84bbfc70f5d) FIXME as compared to v24.3.7.30-lts (c8a28cf4331) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68562](https://github.com/ClickHouse/ClickHouse/issues/68562): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68114](https://github.com/ClickHouse/ClickHouse/issues/68114): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67989](https://github.com/ClickHouse/ClickHouse/issues/67989): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68335](https://github.com/ClickHouse/ClickHouse/issues/68335): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68392](https://github.com/ClickHouse/ClickHouse/issues/68392): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). + diff --git a/docs/changelogs/v24.5.5.41-stable.md b/docs/changelogs/v24.5.5.41-stable.md new file mode 100644 index 00000000000..8ba160e31d7 --- /dev/null +++ b/docs/changelogs/v24.5.5.41-stable.md @@ -0,0 +1,71 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0) + +#### Improvement +* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/docs/changelogs/v24.5.6.45-stable.md b/docs/changelogs/v24.5.6.45-stable.md new file mode 100644 index 00000000000..b329ebab27b --- /dev/null +++ b/docs/changelogs/v24.5.6.45-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.6.3.38-stable.md b/docs/changelogs/v24.6.3.38-stable.md new file mode 100644 index 00000000000..01d7e26e31f --- /dev/null +++ b/docs/changelogs/v24.6.3.38-stable.md @@ -0,0 +1,83 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c) + +#### Improvement +* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)). +* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/docs/changelogs/v24.6.4.42-stable.md b/docs/changelogs/v24.6.4.42-stable.md new file mode 100644 index 00000000000..29b6ba095af --- /dev/null +++ b/docs/changelogs/v24.6.4.42-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.7.3.42-stable.md b/docs/changelogs/v24.7.3.42-stable.md new file mode 100644 index 00000000000..48f6e301f3c --- /dev/null +++ b/docs/changelogs/v24.7.3.42-stable.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.3.42-stable (63730bc4293) FIXME as compared to v24.7.2.13-stable (6e41f601b2f) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + diff --git a/docs/changelogs/v24.7.3.47-stable.md b/docs/changelogs/v24.7.3.47-stable.md new file mode 100644 index 00000000000..e5f23a70fe1 --- /dev/null +++ b/docs/changelogs/v24.7.3.47-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.7.4.51-stable.md b/docs/changelogs/v24.7.4.51-stable.md new file mode 100644 index 00000000000..a7cf9790383 --- /dev/null +++ b/docs/changelogs/v24.7.4.51-stable.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). +* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.8.1.2684-lts.md b/docs/changelogs/v24.8.1.2684-lts.md new file mode 100644 index 00000000000..8171bb3d719 --- /dev/null +++ b/docs/changelogs/v24.8.1.2684-lts.md @@ -0,0 +1,525 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.8.1.2684-lts (161c62fd295) FIXME as compared to v24.8.1.1-new (3f8b27d7acc) + +#### Backward Incompatible Change +* `clickhouse-client` and `clickhouse-local` now default to multi-query mode (instead single-query mode). As an example, `clickhouse-client -q "SELECT 1; SELECT 2"` now works, whereas users previously had to add `--multiquery` (or `-n`). The `--multiquery/-n` switch became obsolete. INSERT queries in multi-query statements are treated specially based on their FORMAT clause: If the FORMAT is `VALUES` (the most common case), the end of the INSERT statement is represented by a trailing semicolon `;` at the end of the query. For all other FORMATs (e.g. `CSV` or `JSONEachRow`), the end of the INSERT statement is represented by two newlines `\n\n` at the end of the query. [#63898](https://github.com/ClickHouse/ClickHouse/pull/63898) ([FFish](https://github.com/wxybear)). +* In previous versions, it was possible to use an alternative syntax for `LowCardinality` data types by appending `WithDictionary` to the name of the data type. It was an initial working implementation, and it was never documented or exposed to the public. Now, it is deprecated. If you have used this syntax, you have to ALTER your tables and rename the data types to `LowCardinality`. [#66842](https://github.com/ClickHouse/ClickHouse/pull/66842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix logical errors with storage `Buffer` used with distributed destination table. It's a backward incompatible change: queries using `Buffer` with a distributed destination table may stop working if the table appears more than once in the query (e.g., in a self-join). [#67015](https://github.com/ClickHouse/ClickHouse/pull/67015) ([vdimir](https://github.com/vdimir)). +* In previous versions, calling functions for random distributions based on the Gamma function (such as Chi-Squared, Student, Fisher) with negative arguments close to zero led to a long computation or an infinite loop. In the new version, calling these functions with zero or negative arguments will produce an exception. This closes [#67297](https://github.com/ClickHouse/ClickHouse/issues/67297). [#67326](https://github.com/ClickHouse/ClickHouse/pull/67326) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The system table `text_log` is enabled by default. This is fully compatible with previous versions, but you may notice subtly increased disk usage on the local disk (this system table takes a tiny amount of disk space). [#67428](https://github.com/ClickHouse/ClickHouse/pull/67428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, `arrayWithConstant` can be slow if asked to generate very large arrays. In the new version, it is limited to 1 GB per array. This closes [#32754](https://github.com/ClickHouse/ClickHouse/issues/32754). [#67741](https://github.com/ClickHouse/ClickHouse/pull/67741) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix REPLACE modifier formatting (forbid omitting brackets). [#67774](https://github.com/ClickHouse/ClickHouse/pull/67774) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68349](https://github.com/ClickHouse/ClickHouse/issues/68349): Reimplement Dynamic type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into Dynamic column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Add new experimental Kafka storage engine to store offsets in Keeper instead of relying on committing them to Kafka. [#57625](https://github.com/ClickHouse/ClickHouse/pull/57625) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add new TimeSeries table engine: - by default: ``` CREATE TABLE tbl ENGINE=TimeSeries ``` - or with specifying engines of its internal tables:. [#64183](https://github.com/ClickHouse/ClickHouse/pull/64183) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support more join strictnesses (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y (see setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)). +* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)). +* This pull request introduces Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add function printf for spark compatiability. [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)). +* Backported in [#68450](https://github.com/ClickHouse/ClickHouse/issues/68450): Implement new JSON data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new server setting: disable_insertion_and_mutation Set it to true. This node will deny all insertions and mutations(Alter table delete/update/drop partition). Include async insertion. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)). +* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to Null engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)). +* Added support for reading MULTILINESTRING geometry in WKT format using function readWKTLineString. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)). +* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)). +* Support query `DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)). +* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)). + +#### Performance Improvement +* Use adaptive read task size calculation method (adaptive meaning it depends on read column sizes) for parallel replicas. [#60377](https://github.com/ClickHouse/ClickHouse/pull/60377) ([Nikita Taranov](https://github.com/nickitat)). +* Store the `plain_rewritable` disk directory metadata in `__meta` layout, separately from the merge tree data in the object storage. Move the `plain_rewritable` disk to a flat directory structure. [#65751](https://github.com/ClickHouse/ClickHouse/pull/65751) ([Julia Kartseva](https://github.com/jkartseva)). +* Enable `compile_expressions` (JIT compiler for fragments of ordinary expressions) by default. This closes [#51264](https://github.com/ClickHouse/ClickHouse/issues/51264) and [#56386](https://github.com/ClickHouse/ClickHouse/issues/56386). [#66486](https://github.com/ClickHouse/ClickHouse/pull/66486) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve columns squashing for String/Array/Map/Variant/Dynamic types by reserving required memory in advance for all subcolumns. [#67043](https://github.com/ClickHouse/ClickHouse/pull/67043) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up system flush logs, flush logs on shutdown. [#67472](https://github.com/ClickHouse/ClickHouse/pull/67472) ([Sema Checherinda](https://github.com/CheSema)). +* Backported in [#68496](https://github.com/ClickHouse/ClickHouse/issues/68496): Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)). +* Setting `optimize_functions_to_subcolumns` is enabled by default. [#68053](https://github.com/ClickHouse/ClickHouse/pull/68053) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* ClickHouse server now supports new setting `max_keep_alive_requests`. For keep-alive HTTP connections to the server it works in tandem with `keep_alive_timeout` - if idle timeout not expired but there already more than `max_keep_alive_requests` requests done through the given connection - it will be closed by the server. [#61793](https://github.com/ClickHouse/ClickHouse/pull/61793) ([Nikita Taranov](https://github.com/nickitat)). +* As in the new version, SOURCES are checked based on Table Engine logic, even grant table engine is disabled by default, if a source is not granted, a prompt of table engine would popup instead, which is misleading. [#65419](https://github.com/ClickHouse/ClickHouse/pull/65419) ([jsc0218](https://github.com/jsc0218)). +* Added statistics type `count_min` (count-min sketches) which provide selectivity estimations for equality predicates like `col = 'val'`. Supported data types are string, date, datatime and numeric types. [#65521](https://github.com/ClickHouse/ClickHouse/pull/65521) ([JackyWoo](https://github.com/JackyWoo)). +* Do not pass logs for keeper explicitly in the image to allow overriding. [#65564](https://github.com/ClickHouse/ClickHouse/pull/65564) ([Azat Khuzhin](https://github.com/azat)). +* Use `Atomic` database by default in `clickhouse-local`. Address items 1 and 5 from [#50647](https://github.com/ClickHouse/ClickHouse/issues/50647). Closes [#44817](https://github.com/ClickHouse/ClickHouse/issues/44817). [#65860](https://github.com/ClickHouse/ClickHouse/pull/65860) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the `rows_before_aggregation_at_least` statistic to the query response when `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)). +* Introduced `use_same_password_for_base_backup` settings for `BACKUP` and `RESTORE` queries, allowing to create and restore incremental backups to/from password protected archives. [#66214](https://github.com/ClickHouse/ClickHouse/pull/66214) ([Samuele](https://github.com/sguerrini97)). +* Ignore async_load_databases for ATTACH query (previously it was possible for ATTACH to return before the tables had been attached). [#66240](https://github.com/ClickHouse/ClickHouse/pull/66240) ([Azat Khuzhin](https://github.com/azat)). +* [Replicated]MergeTreeSink has to properly cancel its delayed_chunk on `onCancel()` method. [#66279](https://github.com/ClickHouse/ClickHouse/pull/66279) ([Sema Checherinda](https://github.com/CheSema)). +* Added logs and metrics for rejected connections (where there are not enough resources). [#66410](https://github.com/ClickHouse/ClickHouse/pull/66410) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Setting `allow_experimental_analyzer` is renamed to `enable_analyzer`. The old name is preserved in a form of an alias. [#66438](https://github.com/ClickHouse/ClickHouse/pull/66438) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support true UUID type for MongoDB engine. [#66671](https://github.com/ClickHouse/ClickHouse/pull/66671) ([Azat Khuzhin](https://github.com/azat)). +* Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)). +* Add replication lag and recovery time metrics. [#66703](https://github.com/ClickHouse/ClickHouse/pull/66703) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add S3DiskNoKeyErrors metric. [#66704](https://github.com/ClickHouse/ClickHouse/pull/66704) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Ensure COMMENT clause works for all table engines. [#66832](https://github.com/ClickHouse/ClickHouse/pull/66832) ([Joe Lynch](https://github.com/joelynch)). +* Update the usage of error code `BAD_ARGUMENTS` and `ILLEGAL_TYPE_OF_ARGUMENT` by more accurate error codes when appropriate. [#66851](https://github.com/ClickHouse/ClickHouse/pull/66851) ([Yohann Jardin](https://github.com/yohannj)). +* Function `mapFromArrays` now accepts `Map(K, V)` as first argument, for example: `SELECT mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb'])` now works and returns `{('a',4):'aa',('b',4):'bb'}`. Also, if the 1st argument is an Array, it can now also be of type `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as long as the actual array values are not `NULL`. [#67103](https://github.com/ClickHouse/ClickHouse/pull/67103) ([李扬](https://github.com/taiyang-li)). +* Read configuration for clickhouse-local from ~/.clickhouse-local. [#67135](https://github.com/ClickHouse/ClickHouse/pull/67135) ([Azat Khuzhin](https://github.com/azat)). +* Rename setting `input_format_orc_read_use_writer_time_zone` to `input_format_orc_reader_timezone` and allow the user to set the reader timezone. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)). +* Decrease level of 'Socket is not connected' error when HTTP connection immediately reset by peer after connecting, close [#34218](https://github.com/ClickHouse/ClickHouse/issues/34218). [#67177](https://github.com/ClickHouse/ClickHouse/pull/67177) ([vdimir](https://github.com/vdimir)). +* Speed up tables removal for `DROP DATABASE` query, increased the default value for `database_catalog_drop_table_concurrency` to 16. [#67228](https://github.com/ClickHouse/ClickHouse/pull/67228) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add ability to load dashboards for system.dashboards from config (once set, they overrides the default dashboards preset). [#67232](https://github.com/ClickHouse/ClickHouse/pull/67232) ([Azat Khuzhin](https://github.com/azat)). +* The window functions in SQL are traditionally in snake case. ClickHouse uses camelCase, so new aliases `denseRank()` and `percentRank()` have been created. These new functions can be called the exact same as the original `dense_rank()` and `percent_rank()` functions. Both snake case and camelCase syntaxes remain usable. A new test for each of the functions has been added as well. This closes [#67042](https://github.com/ClickHouse/ClickHouse/issues/67042) . [#67334](https://github.com/ClickHouse/ClickHouse/pull/67334) ([Peter Nguyen](https://github.com/petern48)). +* Autodetect configuration file format if is not .xml, .yml or .yaml. If the file begins with < it might be XML, otherwise it might be YAML. Non regular file just parse as XML such as PIPE: /dev/fd/X. [#67391](https://github.com/ClickHouse/ClickHouse/pull/67391) ([sakulali](https://github.com/sakulali)). +* Functions `formatDateTime` and `formatDateTimeInJodaSyntax` now treat their format parameter as optional. If it is not specified, format strings `%Y-%m-%d %H:%i:%s` and `yyyy-MM-dd HH:mm:ss` are assumed. Example: `SELECT parseDateTime('2021-01-04 23:12:34')` now returns DateTime value `2021-01-04 23:12:34` (previously, this threw an exception). [#67399](https://github.com/ClickHouse/ClickHouse/pull/67399) ([Robert Schulze](https://github.com/rschu1ze)). +* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)). +* Rework usage of custom table's disks. [#67684](https://github.com/ClickHouse/ClickHouse/pull/67684) ([Sema Checherinda](https://github.com/CheSema)). +* Various improvements in the advanced dashboard. This closes [#67697](https://github.com/ClickHouse/ClickHouse/issues/67697). This closes [#63407](https://github.com/ClickHouse/ClickHouse/issues/63407). This closes [#51129](https://github.com/ClickHouse/ClickHouse/issues/51129). This closes [#61204](https://github.com/ClickHouse/ClickHouse/issues/61204). [#67701](https://github.com/ClickHouse/ClickHouse/pull/67701) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid allocate too much capacity for array column while writing orc & some minor refactors to make code cleaner. Performance speeds up 15% for array column. [#67879](https://github.com/ClickHouse/ClickHouse/pull/67879) ([李扬](https://github.com/taiyang-li)). +* Support OPTIMIZE query on Join table engine to reduce Join tables memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)). +* Add replication lag and recovery time metrics. [#67913](https://github.com/ClickHouse/ClickHouse/pull/67913) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add '-no-pie' to aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)). +* Backported in [#68481](https://github.com/ClickHouse/ClickHouse/issues/68481): Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)). +* Fix settings/current_database in system.processes for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)). +* Remove unnecessary logs for MergeTree that doesn't support replication. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)). +* Backported in [#68430](https://github.com/ClickHouse/ClickHouse/issues/68430): Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Fix formatting of query with aliased JOIN ON expression, e.g. `... JOIN t2 ON (x = y) AS e ORDER BY x` should be formatted as `... JOIN t2 ON ((x = y) AS e) ORDER BY x`. [#66312](https://github.com/ClickHouse/ClickHouse/pull/66312) ([vdimir](https://github.com/vdimir)). +* Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible runtime error while converting Array field with nulls to Array(Variant). [#66727](https://github.com/ClickHouse/ClickHouse/pull/66727) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Fix creating KeeperMap table after an incomplete drop. [#66865](https://github.com/ClickHouse/ClickHouse/pull/66865) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix broken part error while restoring to a `s3_plain_rewritable` disk. [#66881](https://github.com/ClickHouse/ClickHouse/pull/66881) ([Vitaly Baranov](https://github.com/vitlibar)). +* In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Fix invalid format detection in schema inference that could lead to logical error Format {} doesn't support schema inference. [#66899](https://github.com/ClickHouse/ClickHouse/pull/66899) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)). +* Reading from the `numbers` could wrongly throw an exception when the `max_rows_to_read` limit was set. This closes [#66992](https://github.com/ClickHouse/ClickHouse/issues/66992). [#66996](https://github.com/ClickHouse/ClickHouse/pull/66996) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add proper type conversion to lagInFrame and leadInFrame window functions - fixes msan test. [#67091](https://github.com/ClickHouse/ClickHouse/pull/67091) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use a separate client context in `clickhouse-local`. [#67133](https://github.com/ClickHouse/ClickHouse/pull/67133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)). +* This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)). +* Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix attaching ReplicatedMergeTree table after exception during startup. [#67360](https://github.com/ClickHouse/ClickHouse/pull/67360) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by incorrectly detaching from thread group in `Aggregator`. [#67385](https://github.com/ClickHouse/ClickHouse/pull/67385) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix one more case when a non-deterministic function is specified in PK. [#67395](https://github.com/ClickHouse/ClickHouse/pull/67395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `bloom_filter` index breaking queries with mildly weird conditions like `(k=2)=(k=2)` or `has([1,2,3], k)`. [#67423](https://github.com/ClickHouse/ClickHouse/pull/67423) ([Michael Kolupaev](https://github.com/al13n321)). +* Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix wait for tasks in ~WriteBufferFromS3 in case WriteBuffer was cancelled. [#67459](https://github.com/ClickHouse/ClickHouse/pull/67459) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Protect temporary part directories from removing during RESTORE. [#67491](https://github.com/ClickHouse/ClickHouse/pull/67491) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Expected the argument №N of type T to have X rows, but it has 0`. The error could happen in a remote query with constant expression in `GROUP BY` (with a new analyzer). [#67536](https://github.com/ClickHouse/ClickHouse/pull/67536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix join on tuple with NULLs: Some queries with the new analyzer and `NULL` inside the tuple in the `JOIN ON` section returned incorrect results. [#67538](https://github.com/ClickHouse/ClickHouse/pull/67538) ([vdimir](https://github.com/vdimir)). +* Fix redundant reschedule of FileCache::freeSpaceRatioKeepingThreadFunc() in case of full non-evictable cache. [#67540](https://github.com/ClickHouse/ClickHouse/pull/67540) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix for function `toStartOfWeek` which returned the wrong result with a small `DateTime64` value. [#67558](https://github.com/ClickHouse/ClickHouse/pull/67558) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix `Logical error: 'file_offset_of_buffer_end <= read_until_position'` in filesystem cache. Closes [#57508](https://github.com/ClickHouse/ClickHouse/issues/57508). [#67623](https://github.com/ClickHouse/ClickHouse/pull/67623) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes [#62282](https://github.com/ClickHouse/ClickHouse/issues/62282). Removed the call to `convertFieldToString()` and added datatype specific serialization code. Parameterized view substitution was broken for multiple datatypes when parameter value was a function or expression returning datatype instance. [#67654](https://github.com/ClickHouse/ClickHouse/pull/67654) ([Shankar](https://github.com/shiyer7474)). +* Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible logical error "Unexpected return type from if" with experimental Variant type and enabled setting `use_variant_as_common_type ` in function if with Tuples and Maps. [#67687](https://github.com/ClickHouse/ClickHouse/pull/67687) ([Kruglov Pavel](https://github.com/Avogar)). +* Due to a bug in Linux Kernel, a query can hung in `TimerDescriptor::drain`. This closes [#37686](https://github.com/ClickHouse/ClickHouse/issues/37686). [#67702](https://github.com/ClickHouse/ClickHouse/pull/67702) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix completion of `RESTORE ON CLUSTER` command. [#67720](https://github.com/ClickHouse/ClickHouse/pull/67720) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading. [#67751](https://github.com/ClickHouse/ClickHouse/pull/67751) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Queries like `SELECT count() FROM t WHERE cast(c = 1 or c = 9999 AS Bool) SETTINGS use_skip_indexes=1` with bloom filter indexes on `c` now work correctly. [#67781](https://github.com/ClickHouse/ClickHouse/pull/67781) ([jsc0218](https://github.com/jsc0218)). +* Fix wrong aggregation result in some queries with aggregation without keys and filter, close [#67419](https://github.com/ClickHouse/ClickHouse/issues/67419). [#67804](https://github.com/ClickHouse/ClickHouse/pull/67804) ([vdimir](https://github.com/vdimir)). +* Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix DateTime64 parsing after constant folding in distributed queries, close [#66773](https://github.com/ClickHouse/ClickHouse/issues/66773). [#67920](https://github.com/ClickHouse/ClickHouse/pull/67920) ([vdimir](https://github.com/vdimir)). +* Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Now ClickHouse doesn't consider part as broken if projection doesn't exist on disk but exists in `checksums.txt`. [#68003](https://github.com/ClickHouse/ClickHouse/pull/68003) ([alesapin](https://github.com/alesapin)). +* Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68343](https://github.com/ClickHouse/ClickHouse/issues/68343): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68400](https://github.com/ClickHouse/ClickHouse/issues/68400): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Improved `test_storage_s3` tests: increased `s3_max_single_read_retries` for read from "unstable" s3 source and allowed all tests to run multiple times in a row. [#66896](https://github.com/ClickHouse/ClickHouse/pull/66896) ([Ilya Yatsishin](https://github.com/qoega)). +* Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeat works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)). +* Allow to use CLion with ClickHouse. In previous versions, CLion freezed for a minute on every keypress. This closes [#66994](https://github.com/ClickHouse/ClickHouse/issues/66994). [#66995](https://github.com/ClickHouse/ClickHouse/pull/66995) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Getauxval: avoid crash under sanitizer re-exec due to high aslr entropy. [#67081](https://github.com/ClickHouse/ClickHouse/pull/67081) ([Raúl Marín](https://github.com/Algunenano)). +* Some parts of client code are extracted to a single file and highest possible level optimization is applied to them even for debug builds. This closes: [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). [#67215](https://github.com/ClickHouse/ClickHouse/pull/67215) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NO CL CATEGORY + +* Backported in [#68416](https://github.com/ClickHouse/ClickHouse/issues/68416):. [#68386](https://github.com/ClickHouse/ClickHouse/pull/68386) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Fix for 992 and friends"'. [#66993](https://github.com/ClickHouse/ClickHouse/pull/66993) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Fix for 992 and friends""'. [#67029](https://github.com/ClickHouse/ClickHouse/pull/67029) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "FuzzQuery table function"'. [#67040](https://github.com/ClickHouse/ClickHouse/pull/67040) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Enable `compile_expressions` by default."'. [#67299](https://github.com/ClickHouse/ClickHouse/pull/67299) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Slightly better calculation of primary index"'. [#67392](https://github.com/ClickHouse/ClickHouse/pull/67392) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Add settings to replace external engines to Null during create"'. [#67507](https://github.com/ClickHouse/ClickHouse/pull/67507) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Add settings to replace external engines to Null during create""'. [#67511](https://github.com/ClickHouse/ClickHouse/pull/67511) ([Ilya Yatsishin](https://github.com/qoega)). +* NO CL ENTRY: 'Revert "Add replication lag and recovery time metrics"'. [#67731](https://github.com/ClickHouse/ClickHouse/pull/67731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Slightly better calculation of primary index""'. [#67846](https://github.com/ClickHouse/ClickHouse/pull/67846) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "CI: Strict job timeout 1.5h for tests, 2h for builds"'. [#67986](https://github.com/ClickHouse/ClickHouse/pull/67986) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"'. [#68014](https://github.com/ClickHouse/ClickHouse/pull/68014) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Use `Atomic` database by default in `clickhouse-local`"'. [#68023](https://github.com/ClickHouse/ClickHouse/pull/68023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Refactor tests for (experimental) statistics"'. [#68156](https://github.com/ClickHouse/ClickHouse/pull/68156) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: enable libfuzzer (fixing build and docker). [#61908](https://github.com/ClickHouse/ClickHouse/pull/61908) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Initial implementation of vector similarity index. [#63675](https://github.com/ClickHouse/ClickHouse/pull/63675) ([Robert Schulze](https://github.com/rschu1ze)). +* Update zlib-ng from 2.0.2 to 2.1.7. [#64489](https://github.com/ClickHouse/ClickHouse/pull/64489) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix 02444_async_broken_outdated_part_loading flakiness. [#64956](https://github.com/ClickHouse/ClickHouse/pull/64956) ([Azat Khuzhin](https://github.com/azat)). +* attach_gdb.lib: print more information before all stacks. [#65253](https://github.com/ClickHouse/ClickHouse/pull/65253) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix some perf tests. [#65320](https://github.com/ClickHouse/ClickHouse/pull/65320) ([Nikita Taranov](https://github.com/nickitat)). +* Remove ActionsDAGPtr whenever it is possible. [#65414](https://github.com/ClickHouse/ClickHouse/pull/65414) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Scheduler queue throughput introspection, Fix CPU indication in client. [#65654](https://github.com/ClickHouse/ClickHouse/pull/65654) ([Sergei Trifonov](https://github.com/serxa)). +* Increase timeout in 02122_join_group_by_timeout for tsan build. [#65976](https://github.com/ClickHouse/ClickHouse/pull/65976) ([vdimir](https://github.com/vdimir)). +* Remove default values for certificateFile/privateKeyFile/dhParamsFile in keeper config (to avoid annoying errors in logs). [#65978](https://github.com/ClickHouse/ClickHouse/pull/65978) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v24.3.5.46-lts. [#66054](https://github.com/ClickHouse/ClickHouse/pull/66054) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flaky `02814_currentDatabase_for_table_functions`. [#66111](https://github.com/ClickHouse/ClickHouse/pull/66111) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix possible data-race StorageKafka with statistics_interval_ms>0. [#66311](https://github.com/ClickHouse/ClickHouse/pull/66311) ([Azat Khuzhin](https://github.com/azat)). +* Avoid unneeded calculation in SeriesPeriodDetect. [#66320](https://github.com/ClickHouse/ClickHouse/pull/66320) ([Ruihang Xia](https://github.com/waynexia)). +* It aims to complete [#58630](https://github.com/ClickHouse/ClickHouse/issues/58630). This is made possible by [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463), [#61459](https://github.com/ClickHouse/ClickHouse/issues/61459) and [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082). [#66443](https://github.com/ClickHouse/ClickHouse/pull/66443) ([Amos Bird](https://github.com/amosbird)). +* Allow run query instantly in play. [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Bump ICU from v70 to v75. [#66474](https://github.com/ClickHouse/ClickHouse/pull/66474) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump RocksDB from v6.29.5 to v7.10.2. [#66475](https://github.com/ClickHouse/ClickHouse/pull/66475) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump RocksDB from v7.10.2 to v8.9.1. [#66479](https://github.com/ClickHouse/ClickHouse/pull/66479) ([Robert Schulze](https://github.com/rschu1ze)). +* I believe the error code for this function should not be "NOT_ALLOWED" since it simply is an invalid query and "BAD_QUERY_PARAMETER" is a more reasonable error code for this. [#66491](https://github.com/ClickHouse/ClickHouse/pull/66491) ([Ali](https://github.com/xogoodnow)). +* Update gdb to 15.1 (by compiling from sources). [#66494](https://github.com/ClickHouse/ClickHouse/pull/66494) ([Azat Khuzhin](https://github.com/azat)). +* Ensure that llvm-symbolizer is used for symbolizing sanitizer reports. [#66495](https://github.com/ClickHouse/ClickHouse/pull/66495) ([Azat Khuzhin](https://github.com/azat)). +* Remove unused local variables. [#66503](https://github.com/ClickHouse/ClickHouse/pull/66503) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This will solve a lot of problems with inconsistent formatting. And it opens the path for [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). This closes [#66807](https://github.com/ClickHouse/ClickHouse/issues/66807). This closes [#61611](https://github.com/ClickHouse/ClickHouse/issues/61611). This closes [#61711](https://github.com/ClickHouse/ClickHouse/issues/61711). This closes [#67445](https://github.com/ClickHouse/ClickHouse/issues/67445). [#66506](https://github.com/ClickHouse/ClickHouse/pull/66506) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename Context::getSettings() to Context::getSettingsCopy(). [#66528](https://github.com/ClickHouse/ClickHouse/pull/66528) ([Raúl Marín](https://github.com/Algunenano)). +* Uninteresting change: introducing `ClientApplicationBase`. [#66549](https://github.com/ClickHouse/ClickHouse/pull/66549) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Respond to parallel replicas protocol requests with priority on initiator. [#66618](https://github.com/ClickHouse/ClickHouse/pull/66618) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix bad code: it was catching exceptions. [#66628](https://github.com/ClickHouse/ClickHouse/pull/66628) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dump all memory stats in CgroupsMemoryUsageObserver on hitting the limit. [#66732](https://github.com/ClickHouse/ClickHouse/pull/66732) ([Nikita Taranov](https://github.com/nickitat)). +* Save writer thread id in shared mutex for debugging. [#66745](https://github.com/ClickHouse/ClickHouse/pull/66745) ([Alexander Gololobov](https://github.com/davenger)). +* Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Looks like it runs too many mutations sometimes and fails to process them within the timeout. So if a query waits for mutations - the test fails. [#66785](https://github.com/ClickHouse/ClickHouse/pull/66785) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better diagnostics in functional tests. [#66790](https://github.com/ClickHouse/ClickHouse/pull/66790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `01042_system_reload_dictionary_reloads_completely`. [#66811](https://github.com/ClickHouse/ClickHouse/pull/66811) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Something is strange with the test about refreshable materialized views. [#66816](https://github.com/ClickHouse/ClickHouse/pull/66816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Randomize `trace_profile_events`. [#66821](https://github.com/ClickHouse/ClickHouse/pull/66821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Stop ignoring SIGSEGV in GDB. [#66847](https://github.com/ClickHouse/ClickHouse/pull/66847) ([Antonio Andelic](https://github.com/antonio2368)). +* Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* When executing queries with parallel replicas that involve only a subset of nodes within a shard, the current behavior is that if all participating replicas are unavailable, the query completes without any errors but returns no results. Referencing issue [#65467](https://github.com/ClickHouse/ClickHouse/issues/65467), this pull request addresses the issue where only a portion of the nodes in a shard are participating in the execution. [#66880](https://github.com/ClickHouse/ClickHouse/pull/66880) ([zoomxi](https://github.com/zoomxi)). +* Speed up stateful tests setup. [#66886](https://github.com/ClickHouse/ClickHouse/pull/66886) ([Raúl Marín](https://github.com/Algunenano)). +* Functions [h-r]*: Iterate over input_rows_count where appropriate. [#66897](https://github.com/ClickHouse/ClickHouse/pull/66897) ([Robert Schulze](https://github.com/rschu1ze)). +* Stateless tests: Change status of failed tests in case of server crash and add no-parallel to high-load tests. [#66901](https://github.com/ClickHouse/ClickHouse/pull/66901) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix performance test about the generateRandom table function, supposedly. [#66906](https://github.com/ClickHouse/ClickHouse/pull/66906) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad tests `share_big_sets`, CC @davenger. [#66908](https://github.com/ClickHouse/ClickHouse/pull/66908) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up mutations for non-replicated MergeTree a bit. [#66909](https://github.com/ClickHouse/ClickHouse/pull/66909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up mutations for non-replicated MergeTree significantly. [#66911](https://github.com/ClickHouse/ClickHouse/pull/66911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix views over distributed tables with Analyzer. [#66912](https://github.com/ClickHouse/ClickHouse/pull/66912) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [CI fest] Try to fix `test_broken_projections/test.py::test_broken_ignored_replicated`. [#66915](https://github.com/ClickHouse/ClickHouse/pull/66915) ([Andrey Zvonov](https://github.com/zvonand)). +* Decrease rate limit in `01923_network_receive_time_metric_insert`. [#66924](https://github.com/ClickHouse/ClickHouse/pull/66924) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Grouparrayintersect: fix serialization bug. [#66928](https://github.com/ClickHouse/ClickHouse/pull/66928) ([Raúl Marín](https://github.com/Algunenano)). +* Update version after release branch. [#66929](https://github.com/ClickHouse/ClickHouse/pull/66929) ([Raúl Marín](https://github.com/Algunenano)). +* Un-flake test_runtime_configurable_cache_size. [#66934](https://github.com/ClickHouse/ClickHouse/pull/66934) ([Robert Schulze](https://github.com/rschu1ze)). +* fix unit tests ResolvePoolTest with timeouts. [#66953](https://github.com/ClickHouse/ClickHouse/pull/66953) ([Sema Checherinda](https://github.com/CheSema)). +* Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* CI: Fixes docker server build for release branches. [#66955](https://github.com/ClickHouse/ClickHouse/pull/66955) ([Max K.](https://github.com/maxknv)). +* Addressing issue [#64936](https://github.com/ClickHouse/ClickHouse/issues/64936). [#66973](https://github.com/ClickHouse/ClickHouse/pull/66973) ([alesapin](https://github.com/alesapin)). +* Add initial 24.7 changelog. [#66976](https://github.com/ClickHouse/ClickHouse/pull/66976) ([Raúl Marín](https://github.com/Algunenano)). +* Apply libunwind fix. [#66977](https://github.com/ClickHouse/ClickHouse/pull/66977) ([Michael Kolupaev](https://github.com/al13n321)). +* CI: Add logs for debugging. [#66979](https://github.com/ClickHouse/ClickHouse/pull/66979) ([Max K.](https://github.com/maxknv)). +* [CI Fest] Split dynamic tests and rewrite them from sh to sql to avoid timeouts. [#66981](https://github.com/ClickHouse/ClickHouse/pull/66981) ([Kruglov Pavel](https://github.com/Avogar)). +* Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* [CI Fest] Fix use-of-uninitialized-value in JSONExtract* numeric functions. [#66984](https://github.com/ClickHouse/ClickHouse/pull/66984) ([Kruglov Pavel](https://github.com/Avogar)). +* It should fix SQLancer checks, but for some reason we stopped invalidating cache for docker builds and fix was not published to our CI for a while. [#66987](https://github.com/ClickHouse/ClickHouse/pull/66987) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixes [#66941](https://github.com/ClickHouse/ClickHouse/issues/66941). [#66991](https://github.com/ClickHouse/ClickHouse/pull/66991) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Remove the support for Kerberized HDFS. [#66998](https://github.com/ClickHouse/ClickHouse/pull/66998) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Fix for workflow results parsing. [#67000](https://github.com/ClickHouse/ClickHouse/pull/67000) ([Max K.](https://github.com/maxknv)). +* Fix flaky `01454_storagememory_data_race_challenge`. [#67003](https://github.com/ClickHouse/ClickHouse/pull/67003) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Jepsen Workflow fix for skipped builds and observability. [#67004](https://github.com/ClickHouse/ClickHouse/pull/67004) ([Max K.](https://github.com/maxknv)). +* bugfix AttachedTable counting not symmetry, and adding some test logs…. [#67007](https://github.com/ClickHouse/ClickHouse/pull/67007) ([Xu Jia](https://github.com/XuJia0210)). +* CI: Automerge when required and non-required checks completed. [#67008](https://github.com/ClickHouse/ClickHouse/pull/67008) ([Max K.](https://github.com/maxknv)). +* Fix test `very_long_arrays`. [#67009](https://github.com/ClickHouse/ClickHouse/pull/67009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try to fix exception logging in destructors of static objects. [#67016](https://github.com/ClickHouse/ClickHouse/pull/67016) ([Antonio Andelic](https://github.com/antonio2368)). +* [Green CI] Fix test test_storage_azure_blob_storage. [#67019](https://github.com/ClickHouse/ClickHouse/pull/67019) ([Daniil Ivanik](https://github.com/divanik)). +* Integration tests: fix flaky tests `test_backup_restore_on_cluster/test_concurrency.py` & `test_manipulate_statistics/test.py`. [#67027](https://github.com/ClickHouse/ClickHouse/pull/67027) ([Nikita Fomichev](https://github.com/fm4v)). +* [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age. [#67035](https://github.com/ClickHouse/ClickHouse/pull/67035) ([Pablo Marcos](https://github.com/pamarcos)). +* Test for alter select with parallel replicas. [#67041](https://github.com/ClickHouse/ClickHouse/pull/67041) ([Igor Nikonov](https://github.com/devcrafter)). +* Split query into multiple queries to consume less memory at once + use less data. Fixes [#67034](https://github.com/ClickHouse/ClickHouse/issues/67034). [#67044](https://github.com/ClickHouse/ClickHouse/pull/67044) ([alesapin](https://github.com/alesapin)). +* Disable setting `optimize_functions_to_subcolumns`. [#67046](https://github.com/ClickHouse/ClickHouse/pull/67046) ([Anton Popov](https://github.com/CurtizJ)). +* Increase max allocation size for sanitizers. [#67049](https://github.com/ClickHouse/ClickHouse/pull/67049) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* rewrite 01171 test. [#67054](https://github.com/ClickHouse/ClickHouse/pull/67054) ([Sema Checherinda](https://github.com/CheSema)). +* Add `**` to `hdfs` docs, add test for `**` in `hdfs`. [#67064](https://github.com/ClickHouse/ClickHouse/pull/67064) ([Andrey Zvonov](https://github.com/zvonand)). +* Very sad failure: ``` 2024.07.24 13:28:45.517777 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} executeQuery: (from 172.16.11.1:55890) OPTIMIZE TABLE replicated_mt FINAL (stage: Complete) 2024.07.24 13:28:45.525945 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (ReplicatedMergeTreeQueue): Waiting for 4 entries to be processed: queue-0000000004, queue-0000000002, queue-0000000001, queue-0000000000 2024.07.24 13:29:15.528024 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e) (MergerMutator): Selected 3 parts from all_0_0_0 to all_2_2_0 2024.07.24 13:29:15.530736 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Created log entry /clickhouse/tables/replicated_mt/log/log-0000000004 for merge all_0_2_1 2024.07.24 13:29:15.530873 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to process log entry 2024.07.24 13:29:15.530919 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to pull log-0000000004 to queue 2024.07.24 13:29:15.534286 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Looking for node corresponding to log-0000000004 in node1 queue 2024.07.24 13:29:15.534793 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for queue-0000000005 to disappear from node1 queue 2024.07.24 13:29:15.585533 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} TCPHandler: Processed in 30.067804125 sec. ```. [#67067](https://github.com/ClickHouse/ClickHouse/pull/67067) ([alesapin](https://github.com/alesapin)). +* Fix flaky `test_seekable_formats_url` and `test_seekable_formats` S3 storage tests. [#67070](https://github.com/ClickHouse/ClickHouse/pull/67070) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* CI: Docker server build fix for new release workflow. [#67075](https://github.com/ClickHouse/ClickHouse/pull/67075) ([Max K.](https://github.com/maxknv)). +* Fix 2680 flasky. [#67078](https://github.com/ClickHouse/ClickHouse/pull/67078) ([jsc0218](https://github.com/jsc0218)). +* [CI Fest] Fix flaky 02447_drop_replica test. [#67085](https://github.com/ClickHouse/ClickHouse/pull/67085) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixes [#67030](https://github.com/ClickHouse/ClickHouse/issues/67030). [#67086](https://github.com/ClickHouse/ClickHouse/pull/67086) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase timeout for curl in tests. [#67089](https://github.com/ClickHouse/ClickHouse/pull/67089) ([Anton Popov](https://github.com/CurtizJ)). +* Try calculating available memory if ClickHouse is bound to subset of NUMA nodes. [#67098](https://github.com/ClickHouse/ClickHouse/pull/67098) ([Antonio Andelic](https://github.com/antonio2368)). +* A more precise way of tracking flushing time in 01246_buffer_flush. [#67099](https://github.com/ClickHouse/ClickHouse/pull/67099) ([Azat Khuzhin](https://github.com/azat)). +* Do not fail CheckReadyForMerge on failed Tests_2 (non-required jobs) Do not skip CiBuddy report step on failures. [#67101](https://github.com/ClickHouse/ClickHouse/pull/67101) ([Max K.](https://github.com/maxknv)). +* Tststs_1 - for all required checks Tststs_2 - for all non-required checks (normal mode) Tststs_2_ww - for all non-required checks (woolenwolfdog mode). [#67104](https://github.com/ClickHouse/ClickHouse/pull/67104) ([Max K.](https://github.com/maxknv)). +* Functions [s-t]*: Iterate over input_rows_count where appropriate. [#67105](https://github.com/ClickHouse/ClickHouse/pull/67105) ([Robert Schulze](https://github.com/rschu1ze)). +* Reintroduce 02805_distributed_queries_timeouts. [#67106](https://github.com/ClickHouse/ClickHouse/pull/67106) ([Azat Khuzhin](https://github.com/azat)). +* Added some tests in relation with [#54881](https://github.com/ClickHouse/ClickHouse/issues/54881). [#67110](https://github.com/ClickHouse/ClickHouse/pull/67110) ([max-vostrikov](https://github.com/max-vostrikov)). +* Reintroduce 03002_part_log_rmt_fetch_*_error tests without flakiness and less time. [#67113](https://github.com/ClickHouse/ClickHouse/pull/67113) ([Azat Khuzhin](https://github.com/azat)). +* Improve tag matching in backport scripts. [#67118](https://github.com/ClickHouse/ClickHouse/pull/67118) ([Raúl Marín](https://github.com/Algunenano)). +* Fixes [#67111](https://github.com/ClickHouse/ClickHouse/issues/67111). [#67121](https://github.com/ClickHouse/ClickHouse/pull/67121) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Increase lock_acquire_timeout_for_background_operations setting in dynamic merges tests. [#67126](https://github.com/ClickHouse/ClickHouse/pull/67126) ([Kruglov Pavel](https://github.com/Avogar)). +* Attempt to fix flakiness of some window view tests. [#67130](https://github.com/ClickHouse/ClickHouse/pull/67130) ([Robert Schulze](https://github.com/rschu1ze)). +* Update assert in cache. [#67138](https://github.com/ClickHouse/ClickHouse/pull/67138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test `00673_subquery_prepared_set_performance`. [#67141](https://github.com/ClickHouse/ClickHouse/pull/67141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes [#67047](https://github.com/ClickHouse/ClickHouse/issues/67047). [#67142](https://github.com/ClickHouse/ClickHouse/pull/67142) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Lower max allocation size in query fuzzer. [#67145](https://github.com/ClickHouse/ClickHouse/pull/67145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes [#66966](https://github.com/ClickHouse/ClickHouse/issues/66966). [#67147](https://github.com/ClickHouse/ClickHouse/pull/67147) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try fix `02481_async_insert_race_long` flakiness. [#67148](https://github.com/ClickHouse/ClickHouse/pull/67148) ([Julia Kartseva](https://github.com/jkartseva)). +* Rename (unreleased) bad setting. [#67149](https://github.com/ClickHouse/ClickHouse/pull/67149) ([Raúl Marín](https://github.com/Algunenano)). +* Uncomment accidentally commented out code in QueryProfiler. [#67152](https://github.com/ClickHouse/ClickHouse/pull/67152) ([Michael Kolupaev](https://github.com/al13n321)). +* Try to fix 2572. [#67158](https://github.com/ClickHouse/ClickHouse/pull/67158) ([jsc0218](https://github.com/jsc0218)). +* Fix benign data race in ZooKeeper. [#67164](https://github.com/ClickHouse/ClickHouse/pull/67164) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove too long unit test. [#67168](https://github.com/ClickHouse/ClickHouse/pull/67168) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `00705_drop_create_merge_tree`. [#67170](https://github.com/ClickHouse/ClickHouse/pull/67170) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix stacktrace cache. [#67173](https://github.com/ClickHouse/ClickHouse/pull/67173) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixes [#67151](https://github.com/ClickHouse/ClickHouse/issues/67151). [#67174](https://github.com/ClickHouse/ClickHouse/pull/67174) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Make 02908_many_requests_to_system_replicas less stressful. [#67176](https://github.com/ClickHouse/ClickHouse/pull/67176) ([Alexander Gololobov](https://github.com/davenger)). +* Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Do not count AttachedTable for tables in information schema databases. [#67187](https://github.com/ClickHouse/ClickHouse/pull/67187) ([Sergei Trifonov](https://github.com/serxa)). +* Verbose output for 03203_client_benchmark_options. [#67188](https://github.com/ClickHouse/ClickHouse/pull/67188) ([vdimir](https://github.com/vdimir)). +* Split test 02967_parallel_replicas_join_algo_and_analyzer. [#67211](https://github.com/ClickHouse/ClickHouse/pull/67211) ([Nikita Taranov](https://github.com/nickitat)). +* Fix flaky `test_pkill_query_log` (tsan). [#67223](https://github.com/ClickHouse/ClickHouse/pull/67223) ([Sergei Trifonov](https://github.com/serxa)). +* Remove integration test `test_broken_projections_in_backups_1`. [#67231](https://github.com/ClickHouse/ClickHouse/pull/67231) ([Vitaly Baranov](https://github.com/vitlibar)). +* Debug logging for [#67002](https://github.com/ClickHouse/ClickHouse/issues/67002). [#67233](https://github.com/ClickHouse/ClickHouse/pull/67233) ([Nikita Taranov](https://github.com/nickitat)). +* Fix oss-fuzz build. [#67235](https://github.com/ClickHouse/ClickHouse/pull/67235) ([Nikita Taranov](https://github.com/nickitat)). +* Fix flaky 00180_no_seek_avoiding_when_reading_from_cache. [#67236](https://github.com/ClickHouse/ClickHouse/pull/67236) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* English. [#67258](https://github.com/ClickHouse/ClickHouse/pull/67258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove strange code. [#67260](https://github.com/ClickHouse/ClickHouse/pull/67260) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MSan report in DatabaseReplicated. [#67262](https://github.com/ClickHouse/ClickHouse/pull/67262) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02310_clickhouse_local_INSERT_progress_profile_events`. [#67264](https://github.com/ClickHouse/ClickHouse/pull/67264) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove test `02982_aggregation_states_destruction`. [#67266](https://github.com/ClickHouse/ClickHouse/pull/67266) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix OOM in test runs. [#67268](https://github.com/ClickHouse/ClickHouse/pull/67268) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove harmful stuff from tests. [#67275](https://github.com/ClickHouse/ClickHouse/pull/67275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `03201_variant_null_map_subcolumn`. [#67276](https://github.com/ClickHouse/ClickHouse/pull/67276) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Split `01651_lc_insert_tiny_log`. [#67279](https://github.com/ClickHouse/ClickHouse/pull/67279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Debug test `02490_benchmark_max_consecutive_errors`. [#67281](https://github.com/ClickHouse/ClickHouse/pull/67281) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02833_concurrrent_sessions`. [#67282](https://github.com/ClickHouse/ClickHouse/pull/67282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a separate test for exception handling. [#67283](https://github.com/ClickHouse/ClickHouse/pull/67283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Debug test `01600_parts_states_metrics_long`. [#67284](https://github.com/ClickHouse/ClickHouse/pull/67284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Faster test `02231_buffer_aggregate_states_leak`. [#67285](https://github.com/ClickHouse/ClickHouse/pull/67285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix curiosities in `TimerDescriptor`. [#67287](https://github.com/ClickHouse/ClickHouse/pull/67287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add retries to test `02911_backup_restore_keeper_map`. [#67290](https://github.com/ClickHouse/ClickHouse/pull/67290) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Functions: Iterate over input_rows_count where appropriate. [#67294](https://github.com/ClickHouse/ClickHouse/pull/67294) ([Robert Schulze](https://github.com/rschu1ze)). +* Add documentation for `compile_expressions`. [#67300](https://github.com/ClickHouse/ClickHouse/pull/67300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Wrap log lines in the CI report for functional tests. [#67301](https://github.com/ClickHouse/ClickHouse/pull/67301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02050_client_profile_events`. [#67309](https://github.com/ClickHouse/ClickHouse/pull/67309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* I do not want to think about this code. [#67312](https://github.com/ClickHouse/ClickHouse/pull/67312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `00940_max_parts_in_total`. [#67313](https://github.com/ClickHouse/ClickHouse/pull/67313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tests for Kafka cannot run in parallel. [#67315](https://github.com/ClickHouse/ClickHouse/pull/67315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#66285](https://github.com/ClickHouse/ClickHouse/issues/66285). [#67325](https://github.com/ClickHouse/ClickHouse/pull/67325) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow-up for [#67301](https://github.com/ClickHouse/ClickHouse/issues/67301). [#67327](https://github.com/ClickHouse/ClickHouse/pull/67327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#61659](https://github.com/ClickHouse/ClickHouse/issues/61659). [#67332](https://github.com/ClickHouse/ClickHouse/pull/67332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix integration test `test_backup_restore_on_cluster/test_disallow_concurrency`. [#67336](https://github.com/ClickHouse/ClickHouse/pull/67336) ([Vitaly Baranov](https://github.com/vitlibar)). +* Faster and less flaky 01246_buffer_flush (by using HTTP over clickhouse-client). [#67340](https://github.com/ClickHouse/ClickHouse/pull/67340) ([Azat Khuzhin](https://github.com/azat)). +* Fix: data race in TCPHandler on socket timeouts settings. [#67341](https://github.com/ClickHouse/ClickHouse/pull/67341) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* CI: Print stdout, stderr for docker pull command. [#67343](https://github.com/ClickHouse/ClickHouse/pull/67343) ([Max K.](https://github.com/maxknv)). +* Followup [#67290](https://github.com/ClickHouse/ClickHouse/issues/67290). [#67348](https://github.com/ClickHouse/ClickHouse/pull/67348) ([vdimir](https://github.com/vdimir)). +* Skip parallel for `test_storage_kerberized_kafka`. [#67349](https://github.com/ClickHouse/ClickHouse/pull/67349) ([Andrey Zvonov](https://github.com/zvonand)). +* Don't use PeekableReadBuffer in JSONAsObject format. [#67354](https://github.com/ClickHouse/ClickHouse/pull/67354) ([Kruglov Pavel](https://github.com/Avogar)). +* This closes: [#57316](https://github.com/ClickHouse/ClickHouse/issues/57316). [#67355](https://github.com/ClickHouse/ClickHouse/pull/67355) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove duplicated tests. [#67357](https://github.com/ClickHouse/ClickHouse/pull/67357) ([Kruglov Pavel](https://github.com/Avogar)). +* Release branch was not detected properly and job which must run on release branch could be reused from feature branches. PR Fixes detection of release branches. [#67358](https://github.com/ClickHouse/ClickHouse/pull/67358) ([Max K.](https://github.com/maxknv)). +* Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Add no-distributed-cache tag in tests. [#67361](https://github.com/ClickHouse/ClickHouse/pull/67361) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Trying to fix test_cache_evicted_by_temporary_data and print debug info. [#67362](https://github.com/ClickHouse/ClickHouse/pull/67362) ([vdimir](https://github.com/vdimir)). +* Try to fix: ALL_CONNECTION_TRIES_FAILED with parallel replicas. [#67389](https://github.com/ClickHouse/ClickHouse/pull/67389) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix bad test `01036_no_superfluous_dict_reload_on_create_database`. [#67390](https://github.com/ClickHouse/ClickHouse/pull/67390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding `SelectedPartsTotal` and `SelectedMarksTotal` as new ProfileEvents. [#67393](https://github.com/ClickHouse/ClickHouse/pull/67393) ([Jordi Villar](https://github.com/jrdi)). +* Print debug info in `test_storage_s3_queue/test.py::test_shards_distributed`. [#67394](https://github.com/ClickHouse/ClickHouse/pull/67394) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Mute degraded perf test. [#67396](https://github.com/ClickHouse/ClickHouse/pull/67396) ([Nikita Taranov](https://github.com/nickitat)). +* Debug TimerDescriptor. [#67397](https://github.com/ClickHouse/ClickHouse/pull/67397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove has_single_port property from plan stream. [#67398](https://github.com/ClickHouse/ClickHouse/pull/67398) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix typo. [#67400](https://github.com/ClickHouse/ClickHouse/pull/67400) ([Halersson Paris](https://github.com/halersson)). +* CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Disable 02932_refreshable_materialized_views. [#67404](https://github.com/ClickHouse/ClickHouse/pull/67404) ([Michael Kolupaev](https://github.com/al13n321)). +* Follow-up to [#67294](https://github.com/ClickHouse/ClickHouse/issues/67294). [#67405](https://github.com/ClickHouse/ClickHouse/pull/67405) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix DWARF range list parsing in stack symbolizer. [#67417](https://github.com/ClickHouse/ClickHouse/pull/67417) ([Michael Kolupaev](https://github.com/al13n321)). +* Make Dwarf::findAddress() fallback slow path less slow. [#67418](https://github.com/ClickHouse/ClickHouse/pull/67418) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix inconsistent formatting of CODEC and STATISTICS. [#67421](https://github.com/ClickHouse/ClickHouse/pull/67421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduced complexity of the test 02832_alter_max_sessions_for_user. [#67425](https://github.com/ClickHouse/ClickHouse/pull/67425) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Remove obsolete `--multiquery` parameter from tests. [#67435](https://github.com/ClickHouse/ClickHouse/pull/67435) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix public backports. [#67439](https://github.com/ClickHouse/ClickHouse/pull/67439) ([Raúl Marín](https://github.com/Algunenano)). +* Bump Azure from v1.12 to v1.13. [#67446](https://github.com/ClickHouse/ClickHouse/pull/67446) ([Robert Schulze](https://github.com/rschu1ze)). +* 24.7 add missing documentation and testing. [#67454](https://github.com/ClickHouse/ClickHouse/pull/67454) ([Nikita Fomichev](https://github.com/fm4v)). +* Use correct order of fields in `StorageURLSource`. [#67455](https://github.com/ClickHouse/ClickHouse/pull/67455) ([Antonio Andelic](https://github.com/antonio2368)). +* run 01171 test in parallel. [#67470](https://github.com/ClickHouse/ClickHouse/pull/67470) ([Sema Checherinda](https://github.com/CheSema)). +* [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown. [#67474](https://github.com/ClickHouse/ClickHouse/pull/67474) ([Alexey Katsman](https://github.com/alexkats)). +* Reduce 02473_multistep_prewhere run time. [#67475](https://github.com/ClickHouse/ClickHouse/pull/67475) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v24.7.1.2915-stable. [#67483](https://github.com/ClickHouse/ClickHouse/pull/67483) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Revert [#61750](https://github.com/ClickHouse/ClickHouse/issues/61750) "Improve JSONEachRow reading by ignoring the keys case". [#67484](https://github.com/ClickHouse/ClickHouse/pull/67484) ([Michael Kolupaev](https://github.com/al13n321)). +* Disable parallel run for `01923_network_receive_time_metric_insert.sh`. [#67492](https://github.com/ClickHouse/ClickHouse/pull/67492) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix test `test_backup_restore_on_cluster/test.py::test_mutation`. [#67494](https://github.com/ClickHouse/ClickHouse/pull/67494) ([Vitaly Baranov](https://github.com/vitlibar)). +* [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* [Green CI] Test `test_storage_azure_blob_storage/test.py` is flaky. [#67512](https://github.com/ClickHouse/ClickHouse/pull/67512) ([Daniil Ivanik](https://github.com/divanik)). +* Prepare Release workflow for production. [#67523](https://github.com/ClickHouse/ClickHouse/pull/67523) ([Max K.](https://github.com/maxknv)). +* Fix upgrade check. [#67524](https://github.com/ClickHouse/ClickHouse/pull/67524) ([Raúl Marín](https://github.com/Algunenano)). +* [Green CI] test 03164_s3_settings_for_queries_and_merges is flaky. [#67535](https://github.com/ClickHouse/ClickHouse/pull/67535) ([Daniil Ivanik](https://github.com/divanik)). +* Log message and increased concurrency for table removal. [#67537](https://github.com/ClickHouse/ClickHouse/pull/67537) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix AsyncLoader destruction race. [#67553](https://github.com/ClickHouse/ClickHouse/pull/67553) ([Sergei Trifonov](https://github.com/serxa)). +* Add an assert into TimerDescriptor. [#67555](https://github.com/ClickHouse/ClickHouse/pull/67555) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Re-enable ICU on s390/x. [#67557](https://github.com/ClickHouse/ClickHouse/pull/67557) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v24.4.4.107-stable. [#67559](https://github.com/ClickHouse/ClickHouse/pull/67559) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Sometimes we fail with timeout in stateless tests and the reason for that seems to be in `stop_logs_replication` step. Add a check for timeout here. [#67560](https://github.com/ClickHouse/ClickHouse/pull/67560) ([Nikolay Degterinsky](https://github.com/evillique)). +* Miscellaneous. [#67564](https://github.com/ClickHouse/ClickHouse/pull/67564) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* enable parallel_view_processing in perf tests. [#67565](https://github.com/ClickHouse/ClickHouse/pull/67565) ([Sema Checherinda](https://github.com/CheSema)). +* Fix flaky `test_system_kafka_consumers_rebalance`. [#67566](https://github.com/ClickHouse/ClickHouse/pull/67566) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update version_date.tsv and changelogs after v24.7.2.13-stable. [#67586](https://github.com/ClickHouse/ClickHouse/pull/67586) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix 01811_storage_buffer_flush_parameters flakiness. [#67589](https://github.com/ClickHouse/ClickHouse/pull/67589) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Update minio in integration tests. [#67595](https://github.com/ClickHouse/ClickHouse/pull/67595) ([Antonio Andelic](https://github.com/antonio2368)). +* added tests for page index in parquet files. [#67596](https://github.com/ClickHouse/ClickHouse/pull/67596) ([max-vostrikov](https://github.com/max-vostrikov)). +* Update check_rabbitmq_is_available. [#67597](https://github.com/ClickHouse/ClickHouse/pull/67597) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Update CHANGELOG.md. [#67607](https://github.com/ClickHouse/ClickHouse/pull/67607) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some `no-parallel` tags from tests. [#67610](https://github.com/ClickHouse/ClickHouse/pull/67610) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md. [#67613](https://github.com/ClickHouse/ClickHouse/pull/67613) ([Tyler Hannan](https://github.com/tylerhannan)). +* Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Don't run ASAN unit tests under gdb. [#67622](https://github.com/ClickHouse/ClickHouse/pull/67622) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in KeyCondition::cloneASTWithInversionPushDown() caused by type change. [#67641](https://github.com/ClickHouse/ClickHouse/pull/67641) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix race condition between ProcessList and Settings. [#67645](https://github.com/ClickHouse/ClickHouse/pull/67645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `02481_async_insert_race_long.sh` flakiness fixes. [#67650](https://github.com/ClickHouse/ClickHouse/pull/67650) ([Julia Kartseva](https://github.com/jkartseva)). +* Fixes [#67651](https://github.com/ClickHouse/ClickHouse/issues/67651). [#67653](https://github.com/ClickHouse/ClickHouse/pull/67653) ([pufit](https://github.com/pufit)). +* Fix flaky `test_replicated_table_attach`. [#67658](https://github.com/ClickHouse/ClickHouse/pull/67658) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v24.4.4.113-stable. [#67659](https://github.com/ClickHouse/ClickHouse/pull/67659) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Even better healthcheck for ldap. [#67667](https://github.com/ClickHouse/ClickHouse/pull/67667) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix 03203_client_benchmark_options. [#67671](https://github.com/ClickHouse/ClickHouse/pull/67671) ([vdimir](https://github.com/vdimir)). +* Integration tests: fix ports clashing problem. [#67672](https://github.com/ClickHouse/ClickHouse/pull/67672) ([Nikita Fomichev](https://github.com/fm4v)). +* Remove some `no-parallel` tags from tests (Part 2). [#67673](https://github.com/ClickHouse/ClickHouse/pull/67673) ([Raúl Marín](https://github.com/Algunenano)). +* Use FunctionArgumentDescriptors for bitSlice. [#67674](https://github.com/ClickHouse/ClickHouse/pull/67674) ([Lennard Eijsackers](https://github.com/Blokje5)). +* Update version_date.tsv and changelog after v24.3.6.48-lts. [#67677](https://github.com/ClickHouse/ClickHouse/pull/67677) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Avoid ddl queue timeout in 02313_filesystem_cache_seeks. [#67680](https://github.com/ClickHouse/ClickHouse/pull/67680) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bad log message in sort description. [#67690](https://github.com/ClickHouse/ClickHouse/pull/67690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelog after v23.8.16.40-lts. [#67692](https://github.com/ClickHouse/ClickHouse/pull/67692) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix check names in test reports and the CI Logs database. [#67696](https://github.com/ClickHouse/ClickHouse/pull/67696) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Minor refactoring in ci_utils. [#67706](https://github.com/ClickHouse/ClickHouse/pull/67706) ([Max K.](https://github.com/maxknv)). +* Fix 01042_system_reload_dictionary_reloads_completely flakiness. [#67719](https://github.com/ClickHouse/ClickHouse/pull/67719) ([Azat Khuzhin](https://github.com/azat)). +* Fix test `00002_log_and_exception_messages_formatting`. [#67723](https://github.com/ClickHouse/ClickHouse/pull/67723) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02789_reading_from_s3_with_connection_pool`. [#67726](https://github.com/ClickHouse/ClickHouse/pull/67726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix strange code in HostResolvePool. [#67727](https://github.com/ClickHouse/ClickHouse/pull/67727) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo. [#67729](https://github.com/ClickHouse/ClickHouse/pull/67729) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Smart handling of processes leftovers in tests. [#67737](https://github.com/ClickHouse/ClickHouse/pull/67737) ([Azat Khuzhin](https://github.com/azat)). +* Fix test retries. [#67738](https://github.com/ClickHouse/ClickHouse/pull/67738) ([Azat Khuzhin](https://github.com/azat)). +* Fill only selected columns from system.clusters. [#67739](https://github.com/ClickHouse/ClickHouse/pull/67739) ([Azat Khuzhin](https://github.com/azat)). +* Bump NuRaft (to properly catch thread exceptions). [#67740](https://github.com/ClickHouse/ClickHouse/pull/67740) ([Azat Khuzhin](https://github.com/azat)). +* Try to fix RabbitMQ test failures. [#67743](https://github.com/ClickHouse/ClickHouse/pull/67743) ([Azat Khuzhin](https://github.com/azat)). +* Stateless tests: attempt to fix timeouts of `02473_multistep_prewhere* 00411_long_accurate_number_comparison*`. [#67746](https://github.com/ClickHouse/ClickHouse/pull/67746) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix test_ttl_move::test_alter_with_merge_work flakiness. [#67747](https://github.com/ClickHouse/ClickHouse/pull/67747) ([Azat Khuzhin](https://github.com/azat)). +* ci: better stateless runner (correctly collect artifacts and also some basic errors capturing). [#67752](https://github.com/ClickHouse/ClickHouse/pull/67752) ([Azat Khuzhin](https://github.com/azat)). +* Introduce `no-flaky-check` tag. [#67755](https://github.com/ClickHouse/ClickHouse/pull/67755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Whitespaces. [#67771](https://github.com/ClickHouse/ClickHouse/pull/67771) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Print original query for AST formatting check on CI. [#67776](https://github.com/ClickHouse/ClickHouse/pull/67776) ([Azat Khuzhin](https://github.com/azat)). +* Fix test `02833_concurrent_sessions`, Fix test `02835_drop_user_during_session`. [#67779](https://github.com/ClickHouse/ClickHouse/pull/67779) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix diagnostics in the test script. [#67780](https://github.com/ClickHouse/ClickHouse/pull/67780) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02231_bloom_filter_sizing`. [#67784](https://github.com/ClickHouse/ClickHouse/pull/67784) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed session_log related tests race condition on logout. [#67785](https://github.com/ClickHouse/ClickHouse/pull/67785) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* ci/stateless: fix artifacts post-processing and detect if something failed there. [#67791](https://github.com/ClickHouse/ClickHouse/pull/67791) ([Azat Khuzhin](https://github.com/azat)). +* Integration tests: fix flaky `test_dictionaries_update_and_reload::test_reload_after_fail_by_timer`. [#67793](https://github.com/ClickHouse/ClickHouse/pull/67793) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix possible CANNOT_READ_ALL_DATA during server startup in performance tests. [#67795](https://github.com/ClickHouse/ClickHouse/pull/67795) ([Azat Khuzhin](https://github.com/azat)). +* Reduce table size in 03037_dynamic_merges_2* tests. [#67797](https://github.com/ClickHouse/ClickHouse/pull/67797) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable 03038_nested_dynamic_merges* under sanitizers because it's too slow. [#67798](https://github.com/ClickHouse/ClickHouse/pull/67798) ([Kruglov Pavel](https://github.com/Avogar)). +* Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update comment. [#67801](https://github.com/ClickHouse/ClickHouse/pull/67801) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bad test `03032_redundant_equals`. [#67822](https://github.com/ClickHouse/ClickHouse/pull/67822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update tags for a few tests. [#67829](https://github.com/ClickHouse/ClickHouse/pull/67829) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add debug logging for window view tests. [#67841](https://github.com/ClickHouse/ClickHouse/pull/67841) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Closes [#67621](https://github.com/ClickHouse/ClickHouse/issues/67621). [#67843](https://github.com/ClickHouse/ClickHouse/pull/67843) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix query cache randomization in stress tests. [#67855](https://github.com/ClickHouse/ClickHouse/pull/67855) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v24.5.5.78-stable. [#67863](https://github.com/ClickHouse/ClickHouse/pull/67863) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Un-flake 02524_fuzz_and_fuss_2. [#67867](https://github.com/ClickHouse/ClickHouse/pull/67867) ([Robert Schulze](https://github.com/rschu1ze)). +* Misc fixes. [#67869](https://github.com/ClickHouse/ClickHouse/pull/67869) ([Alexey Katsman](https://github.com/alexkats)). +* Fixes [#67444](https://github.com/ClickHouse/ClickHouse/issues/67444). [#67873](https://github.com/ClickHouse/ClickHouse/pull/67873) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* no-msan 00314_sample_factor_virtual_column. [#67874](https://github.com/ClickHouse/ClickHouse/pull/67874) ([Michael Kolupaev](https://github.com/al13n321)). +* Revert "Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"". [#67877](https://github.com/ClickHouse/ClickHouse/pull/67877) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lazily create invalid files in S3. [#67882](https://github.com/ClickHouse/ClickHouse/pull/67882) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not try to create azure container if not needed. [#67896](https://github.com/ClickHouse/ClickHouse/pull/67896) ([Anton Popov](https://github.com/CurtizJ)). +* CI: Fix for setting Mergeable Check from sync. [#67898](https://github.com/ClickHouse/ClickHouse/pull/67898) ([Max K.](https://github.com/maxknv)). +* Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing. [#67904](https://github.com/ClickHouse/ClickHouse/pull/67904) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v24.6.3.95-stable. [#67910](https://github.com/ClickHouse/ClickHouse/pull/67910) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove some no-parallel tags from tests (Part 3). [#67914](https://github.com/ClickHouse/ClickHouse/pull/67914) ([Raúl Marín](https://github.com/Algunenano)). +* Follow up [#67235](https://github.com/ClickHouse/ClickHouse/issues/67235). [#67917](https://github.com/ClickHouse/ClickHouse/pull/67917) ([Nikita Taranov](https://github.com/nickitat)). +* CI: Changelog: Critical Bug Fix to Bug Fix. [#67919](https://github.com/ClickHouse/ClickHouse/pull/67919) ([Max K.](https://github.com/maxknv)). +* CI: Multi-channel CiBuddy. [#67923](https://github.com/ClickHouse/ClickHouse/pull/67923) ([Max K.](https://github.com/maxknv)). +* more logs to debug logical error from async inserts. [#67928](https://github.com/ClickHouse/ClickHouse/pull/67928) ([Han Fei](https://github.com/hanfei1991)). +* Fix stress test error with TDigest statistics. [#67930](https://github.com/ClickHouse/ClickHouse/pull/67930) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove some no-parallel tags from tests (Part 4). [#67932](https://github.com/ClickHouse/ClickHouse/pull/67932) ([Raúl Marín](https://github.com/Algunenano)). +* Upgrade QPL to v1.6.0. [#67933](https://github.com/ClickHouse/ClickHouse/pull/67933) ([Maria Zhukova](https://github.com/mzhukova)). +* CI: Strict job timeout 1.5h for tests, 2h for builds. [#67934](https://github.com/ClickHouse/ClickHouse/pull/67934) ([Max K.](https://github.com/maxknv)). +* Remove slow tests from fasttest check. [#67941](https://github.com/ClickHouse/ClickHouse/pull/67941) ([Raúl Marín](https://github.com/Algunenano)). +* Fix memory corruption in usearch. [#67942](https://github.com/ClickHouse/ClickHouse/pull/67942) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68547](https://github.com/ClickHouse/ClickHouse/issues/68547): Timeout handling for functional and integration tests, store artifacts and report if timed out - sets 2h default timeout for all jobs. [#67944](https://github.com/ClickHouse/ClickHouse/pull/67944) ([Max K.](https://github.com/maxknv)). +* Unflake 02099_tsv_raw_format.sh. [#67947](https://github.com/ClickHouse/ClickHouse/pull/67947) ([Robert Schulze](https://github.com/rschu1ze)). +* This closes: [#67866](https://github.com/ClickHouse/ClickHouse/issues/67866). [#67950](https://github.com/ClickHouse/ClickHouse/pull/67950) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Change log level of an insignificant message in clickhouse-local. [#67952](https://github.com/ClickHouse/ClickHouse/pull/67952) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Fix flaky `test_storage_s3_queue/test.py::test_multiple_tables_streaming_sync_distributed`. [#67959](https://github.com/ClickHouse/ClickHouse/pull/67959) ([Julia Kartseva](https://github.com/jkartseva)). +* tests: fix 03002_part_log_rmt_fetch_merge_error flakiness. [#67960](https://github.com/ClickHouse/ClickHouse/pull/67960) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout of 02310_clickhouse_local_INSERT_progress_profile_events. [#67961](https://github.com/ClickHouse/ClickHouse/pull/67961) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. III. [#67964](https://github.com/ClickHouse/ClickHouse/pull/67964) ([Robert Schulze](https://github.com/rschu1ze)). +* Update minio in stateless tests. [#67975](https://github.com/ClickHouse/ClickHouse/pull/67975) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Integration tests uncover some logging. [#67978](https://github.com/ClickHouse/ClickHouse/pull/67978) ([Max K.](https://github.com/maxknv)). +* Fix 03130_convert_outer_join_to_inner_join. [#67980](https://github.com/ClickHouse/ClickHouse/pull/67980) ([vdimir](https://github.com/vdimir)). +* Collect minio audit logs in stateless tests. [#67998](https://github.com/ClickHouse/ClickHouse/pull/67998) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove some no-parallel tags from tests (Part 5). [#68002](https://github.com/ClickHouse/ClickHouse/pull/68002) ([Raúl Marín](https://github.com/Algunenano)). +* Minor fixes in tables.md. [#68004](https://github.com/ClickHouse/ClickHouse/pull/68004) ([Ilya Yatsishin](https://github.com/qoega)). +* Follow up for [#67843](https://github.com/ClickHouse/ClickHouse/issues/67843). [#68007](https://github.com/ClickHouse/ClickHouse/pull/68007) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove unused CLI option. [#68008](https://github.com/ClickHouse/ClickHouse/pull/68008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02845_threads_count_in_distributed_queries`. [#68011](https://github.com/ClickHouse/ClickHouse/pull/68011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Pass job timeout into tests config. [#68013](https://github.com/ClickHouse/ClickHouse/pull/68013) ([Nikita Fomichev](https://github.com/fm4v)). +* Add a test for [#57420](https://github.com/ClickHouse/ClickHouse/issues/57420). [#68017](https://github.com/ClickHouse/ClickHouse/pull/68017) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"". [#68021](https://github.com/ClickHouse/ClickHouse/pull/68021) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Fix for filtering jobs in PRs. [#68022](https://github.com/ClickHouse/ClickHouse/pull/68022) ([Max K.](https://github.com/maxknv)). +* Docs: Update 3rd party library guide. [#68027](https://github.com/ClickHouse/ClickHouse/pull/68027) ([Robert Schulze](https://github.com/rschu1ze)). +* Refactor tests for (experimental) statistics. [#68034](https://github.com/ClickHouse/ClickHouse/pull/68034) ([Robert Schulze](https://github.com/rschu1ze)). +* Split `00284_external_aggregation.sql`. [#68037](https://github.com/ClickHouse/ClickHouse/pull/68037) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelog after v24.7.3.42-stable. [#68045](https://github.com/ClickHouse/ClickHouse/pull/68045) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update test_drop_is_lock_free/test.py. [#68051](https://github.com/ClickHouse/ClickHouse/pull/68051) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixes [#67865](https://github.com/ClickHouse/ClickHouse/issues/67865). [#68054](https://github.com/ClickHouse/ClickHouse/pull/68054) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Disable randomization of `trace_profile_events` in clickhouse-test. [#68058](https://github.com/ClickHouse/ClickHouse/pull/68058) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor CMake cleanup. [#68069](https://github.com/ClickHouse/ClickHouse/pull/68069) ([Robert Schulze](https://github.com/rschu1ze)). +* If the test cluster is overloaded, sometimes simple query execution [can take more time](https://pastila.nl/?00224e71/f017cd6675b52ccc205c81aa62a47de5#8dB4+C4MOdOi3NLV1dc0Fg==) than `Buffer`'s max time to flush. This PR doubles the timeout and allows to skip the check in case of significant latency. [#68072](https://github.com/ClickHouse/ClickHouse/pull/68072) ([pufit](https://github.com/pufit)). +* Fix flaky `02675_profile_events_from_query_log_and_client`. [#68097](https://github.com/ClickHouse/ClickHouse/pull/68097) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix race in `WithRetries`. [#68106](https://github.com/ClickHouse/ClickHouse/pull/68106) ([Antonio Andelic](https://github.com/antonio2368)). +* Add empty cell to reports when time is missing. [#68112](https://github.com/ClickHouse/ClickHouse/pull/68112) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix test `00900_long_parquet_load`. [#68130](https://github.com/ClickHouse/ClickHouse/pull/68130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests: fix 01246_buffer_flush flakiness due to slow trace_log flush. [#68134](https://github.com/ClickHouse/ClickHouse/pull/68134) ([Azat Khuzhin](https://github.com/azat)). +* Only use Field::safeGet - Field::get prone to type punning. [#68135](https://github.com/ClickHouse/ClickHouse/pull/68135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* tests: attempt to fix 01600_parts_states_metrics_long (by forbid parallel run). [#68136](https://github.com/ClickHouse/ClickHouse/pull/68136) ([Azat Khuzhin](https://github.com/azat)). +* Fix01710 Timeout. [#68138](https://github.com/ClickHouse/ClickHouse/pull/68138) ([jsc0218](https://github.com/jsc0218)). +* Remove the extra cell from reports when it is not necessary. [#68145](https://github.com/ClickHouse/ClickHouse/pull/68145) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Remove "Processing configuration file" message from clickhouse-local. [#68157](https://github.com/ClickHouse/ClickHouse/pull/68157) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 02122_join_group_by_timeout flakiness. [#68160](https://github.com/ClickHouse/ClickHouse/pull/68160) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_cluster_all_replicas`. [#68178](https://github.com/ClickHouse/ClickHouse/pull/68178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix leftovers. [#68181](https://github.com/ClickHouse/ClickHouse/pull/68181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01172_transaction_counters`. [#68182](https://github.com/ClickHouse/ClickHouse/pull/68182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactor tests for (experimental) statistics. [#68186](https://github.com/ClickHouse/ClickHouse/pull/68186) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove Log engine from Kafka integration tests. [#68200](https://github.com/ClickHouse/ClickHouse/pull/68200) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Replace segfault in Replicated database with logical error. [#68250](https://github.com/ClickHouse/ClickHouse/pull/68250) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#68423](https://github.com/ClickHouse/ClickHouse/issues/68423): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68374](https://github.com/ClickHouse/ClickHouse/issues/68374): Rename: S3DiskNoKeyErrors -> DiskS3NoSuchKeyErrors. [#68361](https://github.com/ClickHouse/ClickHouse/pull/68361) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Backported in [#68637](https://github.com/ClickHouse/ClickHouse/issues/68637): Check for invalid regexp in JSON SKIP REGEXP section. [#68451](https://github.com/ClickHouse/ClickHouse/pull/68451) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68485](https://github.com/ClickHouse/ClickHouse/issues/68485): Better inference of date times 2. [#68452](https://github.com/ClickHouse/ClickHouse/pull/68452) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68540](https://github.com/ClickHouse/ClickHouse/issues/68540): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68518](https://github.com/ClickHouse/ClickHouse/issues/68518): Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68558](https://github.com/ClickHouse/ClickHouse/issues/68558): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). +* Backported in [#68576](https://github.com/ClickHouse/ClickHouse/issues/68576): CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.8.2.3-lts.md b/docs/changelogs/v24.8.2.3-lts.md new file mode 100644 index 00000000000..69dfc9961a2 --- /dev/null +++ b/docs/changelogs/v24.8.2.3-lts.md @@ -0,0 +1,12 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..4cc7563135a 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. :::note A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs @@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -91,6 +91,28 @@ SELECT 1 In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features. For example, if your test uses a MySQL table, you should add a tag `use-mysql`. +### Specifying limits for random settings + +A test can specify minimum and maximum allowed values for settings that can be randomized during test run. + +For `.sh` tests limits are written as a comment on the line next to tags or on the second line if no tags are specified: + +```bash +#!/usr/bin/env bash +# Tags: no-fasttest +# Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) +``` + +For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line: + +```sql +-- Tags: no-fasttest +-- Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) +SELECT 1 +``` + +If you need to specify only one limit, you can use `None` for another one. + ### Choosing the Test Name The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later. diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 5e81eacc937..20c7c511aa9 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -61,6 +61,7 @@ Engines in the family: - [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) - [S3Queue](../../engines/table-engines/integrations/s3queue.md) +- [TimeSeries](../../engines/table-engines/integrations/time-series.md) ### Special Engines {#special-engines} diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index c9df713231a..404cec97def 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -240,7 +240,7 @@ libhdfs3 support HDFS namenode HA. ## Storage Settings {#storage-settings} - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. **See Also** diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 7bdc856c9fd..de6492e8ea7 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -251,6 +251,44 @@ The number of rows in one Kafka message depends on whether the format is row-bas - For row-based formats the number of rows in one Kafka message can be controlled by setting `kafka_max_rows_per_message`. - For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). +## Experimental engine to store committed offsets in ClickHouse Keeper + +If `allow_experimental_kafka_offsets_storage_in_keeper` is enabled, then two more settings can be specified to the Kafka table engine: + - `kafka_keeper_path` specifies the path to the table in ClickHouse Keeper + - `kafka_replica_name` specifies the replica name in ClickHouse Keeper + +Either both of the settings must be specified or neither of them. When both of them are specified, then a new, experimental Kafka engine will be used. The new engine doesn't depend on storing the committed offsets in Kafka, but stores them in ClickHouse Keeper. It still tries to commit the offsets to Kafka, but it only depends on those offsets when the table is created. In any other circumstances (table is restarted, or recovered after some error) the offsets stored in ClickHouse Keeper will be used as an offset to continue consuming messages from. Apart from the committed offset, it also stores how many messages were consumed in the last batch, so if the insert fails, the same amount of messages will be consumed, thus enabling deduplication if necessary. + +Example: + +``` sql +CREATE TABLE experimental_kafka (key UInt64, value UInt64) +ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow') +SETTINGS + kafka_keeper_path = '/clickhouse/{database}/experimental_kafka', + kafka_replica_name = 'r1' +SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1; +``` + +Or to utilize the `uuid` and `replica` macros similarly to ReplicatedMergeTree: + +``` sql +CREATE TABLE experimental_kafka (key UInt64, value UInt64) +ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow') +SETTINGS + kafka_keeper_path = '/clickhouse/{database}/{uuid}', + kafka_replica_name = '{replica}' +SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1; +``` + +### Known limitations + +As the new engine is experimental, it is not production ready yet. There are few known limitations of the implementation: + - The biggest limitation is the engine doesn't support direct reading. Reading from the engine using materialized views and writing to the engine work, but direct reading doesn't. As a result, all direct `SELECT` queries will fail. + - Rapidly dropping and recreating the table or specifying the same ClickHouse Keeper path to different engines might cause issues. As best practice you can use the `{uuid}` in `kafka_keeper_path` to avoid clashing paths. + - To make repeatable reads, messages cannot be consumed from multiple partitions on a single thread. On the other hand, the Kafka consumers have to be polled regularly to keep them alive. As a result of these two objectives, we decided to only allow creating multiple consumers if `kafka_thread_per_consumer` is enabled, otherwise it is too complicated to avoid issues regarding polling consumers regularly. + - Consumers created by the new storage engine do not show up in [`system.kafka_consumers`](../../../operations/system-tables/kafka_consumers.md) table. + **See Also** - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index d664c37bd0f..48a08dfa499 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -225,7 +225,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ## Storage Settings {#storage-settings} - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. ## S3-related Settings {#settings} diff --git a/docs/en/engines/table-engines/integrations/time-series.md b/docs/en/engines/table-engines/integrations/time-series.md new file mode 100644 index 00000000000..4830fd61d27 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/time-series.md @@ -0,0 +1,295 @@ +--- +slug: /en/engines/table-engines/special/time_series +sidebar_position: 60 +sidebar_label: TimeSeries +--- + +# TimeSeries Engine [Experimental] + +A table engine storing time series, i.e. a set of values associated with timestamps and tags (or labels): + +``` +metric_name1[tag1=value1, tag2=value2, ...] = {timestamp1: value1, timestamp2: value2, ...} +metric_name2[...] = ... +``` + +:::info +This is an experimental feature that may change in backwards-incompatible ways in the future releases. +Enable usage of the TimeSeries table engine +with [allow_experimental_time_series_table](../../../operations/settings/settings.md#allow-experimental-time-series-table) setting. +Input the command `set allow_experimental_time_series_table = 1`. +::: + +## Syntax {#syntax} + +``` sql +CREATE TABLE name [(columns)] ENGINE=TimeSeries +[SETTINGS var1=value1, ...] +[DATA db.data_table_name | DATA ENGINE data_table_engine(arguments)] +[TAGS db.tags_table_name | TAGS ENGINE tags_table_engine(arguments)] +[METRICS db.metrics_table_name | METRICS ENGINE metrics_table_engine(arguments)] +``` + +## Usage {#usage} + +It's easier to start with everything set by default (it's allowed to create a `TimeSeries` table without specifying a list of columns): + +``` sql +CREATE TABLE my_table ENGINE=TimeSeries +``` + +Then this table can be used with the following protocols (a port must be assigned in the server configuration): +- [prometheus remote-write](../../../interfaces/prometheus.md#remote-write) +- [prometheus remote-read](../../../interfaces/prometheus.md#remote-read) + +## Target tables {#target-tables} + +A `TimeSeries` table doesn't have its own data, everything is stored in its target tables. +This is similar to how a [materialized view](../../../sql-reference/statements/create/view#materialized-view) works, +with the difference that a materialized view has one target table +whereas a `TimeSeries` table has three target tables named [data]{#data-table}, [tags]{#tags-table], and [metrics]{#metrics-table}. + +The target tables can be either specified explicitly in the `CREATE TABLE` query +or the `TimeSeries` table engine can generate inner target tables automatically. + +The target tables are the following: +1. The _data_ table {#data-table} contains time series associated with some identifier. +The _data_ table must have columns: + +| Name | Mandatory? | Default type | Possible types | Description | +|---|---|---|---|---| +| `id` | [x] | `UUID` | any | Identifies a combination of a metric names and tags | +| `timestamp` | [x] | `DateTime64(3)` | `DateTime64(X)` | A time point | +| `value` | [x] | `Float64` | `Float32` or `Float64` | A value associated with the `timestamp` | + +2. The _tags_ table {#tags-table} contains identifiers calculated for each combination of a metric name and tags. +The _tags_ table must have columns: + +| Name | Mandatory? | Default type | Possible types | Description | +|---|---|---|---|---| +| `id` | [x] | `UUID` | any (must match the type of `id` in the [data]{#data-table} table) | An `id` identifies a combination of a metric name and tags. The DEFAULT expression specifies how to calculate such an identifier | +| `metric_name` | [x] | `LowCardinality(String)` | `String` or `LowCardinality(String)` | The name of a metric | +| `` | [ ] | `String` | `String` or `LowCardinality(String)` or `LowCardinality(Nullable(String))` | The value of a specific tag, the tag's name and the name of a corresponding column are specified in the [tags_to_columns](#settings) setting | +| `tags` | [x] | `Map(LowCardinality(String), String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Map of tags excluding the tag `__name__` containing the name of a metric and excluding tags with names enumerated in the [tags_to_columns](#settings) setting | +| `all_tags` | [ ] | `Map(String, String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Ephemeral column, each row is a map of all the tags excluding only the tag `__name__` containing the name of a metric. The only purpose of that column is to be used while calculating `id` | +| `min_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Minimum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` | +| `max_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Maximum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` | + +3. The _metrics_ table {#metrics-table} contains some information about metrics been collected, the types of those metrics and their descriptions. +The _metrics_ table must have columns: + +| Name | Mandatory? | Default type | Possible types | Description | +|---|---|---|---|---| +| `metric_family_name` | [x] | `String` | `String` or `LowCardinality(String)` | The name of a metric family | +| `type` | [x] | `String` | `String` or `LowCardinality(String)` | The type of a metric family, one of "counter", "gauge", "summary", "stateset", "histogram", "gaugehistogram" | +| `unit` | [x] | `String` | `String` or `LowCardinality(String)` | The unit used in a metric | +| `help` | [x] | `String` | `String` or `LowCardinality(String)` | The description of a metric | + +Any row inserted into a `TimeSeries` table will be in fact stored in those three target tables. +A `TimeSeries` table contains all those columns from the [data]{#data-table}, [tags]{#tags-table}, [metrics]{#metrics-table} tables. + +## Creation {#creation} + +There are multiple ways to create a table with the `TimeSeries` table engine. +The simplest statement + +``` sql +CREATE TABLE my_table ENGINE=TimeSeries +``` + +will actually create the following table (you can see that by executing `SHOW CREATE TABLE my_table`): + +``` sql +CREATE TABLE my_table +( + `id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)), + `timestamp` DateTime64(3), + `value` Float64, + `metric_name` LowCardinality(String), + `tags` Map(LowCardinality(String), String), + `all_tags` Map(String, String), + `min_time` Nullable(DateTime64(3)), + `max_time` Nullable(DateTime64(3)), + `metric_family_name` String, + `type` String, + `unit` String, + `help` String +) +ENGINE = TimeSeries +DATA ENGINE = MergeTree ORDER BY (id, timestamp) +DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +TAGS ENGINE = AggregatingMergeTree PRIMARY KEY metric_name ORDER BY (metric_name, id) +TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +METRICS ENGINE = ReplacingMergeTree ORDER BY metric_family_name +METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +``` + +So the columns were generated automatically and also there are three inner UUIDs in this statement - +one per each inner target table that was created. +(Inner UUIDs are not shown normally until setting +[show_table_uuid_in_table_create_query_if_not_nil](../../../operations/settings/settings#show_table_uuid_in_table_create_query_if_not_nil) +is set.) + +Inner target tables have names like `.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, +`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, `.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` +and each target table has columns which is a subset of the columns of the main `TimeSeries` table: + +``` sql +CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` +( + `id` UUID, + `timestamp` DateTime64(3), + `value` Float64 +) +ENGINE = MergeTree +ORDER BY (id, timestamp) +``` + +``` sql +CREATE TABLE default.`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` +( + `id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)), + `metric_name` LowCardinality(String), + `tags` Map(LowCardinality(String), String), + `all_tags` Map(String, String) EPHEMERAL, + `min_time` SimpleAggregateFunction(min, Nullable(DateTime64(3))), + `max_time` SimpleAggregateFunction(max, Nullable(DateTime64(3))) +) +ENGINE = AggregatingMergeTree +PRIMARY KEY metric_name +ORDER BY (metric_name, id) +``` + +``` sql +CREATE TABLE default.`.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` +( + `metric_family_name` String, + `type` String, + `unit` String, + `help` String +) +ENGINE = ReplacingMergeTree +ORDER BY metric_family_name +``` + +## Adjusting types of columns {#adjusting-column-types} + +You can adjust the types of almost any column of the inner target tables by specifying them explicitly +while defining the main table. For example, + +``` sql +CREATE TABLE my_table +( + timestamp DateTime64(6) +) ENGINE=TimeSeries +``` + +will make the inner [data]{#data-table} table store timestamp in microseconds instead of milliseconds: + +``` sql +CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` +( + `id` UUID, + `timestamp` DateTime64(6), + `value` Float64 +) +ENGINE = MergeTree +ORDER BY (id, timestamp) +``` + +## The `id` column {#id-column} + +The `id` column contains identifiers, every identifier is calculated for a combination of a metric name and tags. +The DEFAULT expression for the `id` column is an expression which will be used to calculate such identifiers. +Both the type of the `id` column and that expression can be adjusted by specifying them explicitly: + +``` sql +CREATE TABLE my_table +( + id UInt64 DEFAULT sipHash64(metric_name, all_tags) +) ENGINE=TimeSeries +``` + +## The `tags` and `all_tags` columns {#tags-and-all-tags} + +There are two columns containing maps of tags - `tags` and `all_tags`. In this example they mean the same, however they can be different +if setting `tags_to_columns` is used. This setting allows to specify that a specific tag should be stored in a separate column instead of storing +in a map inside the `tags` column: + +``` sql +CREATE TABLE my_table ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'} +``` + +This statement will add columns +``` + `instance` String, + `job` String +``` +to the definition of both `my_table` and its inner [tags]{#tags-table} target table. In this case the `tags` column will not contain tags `instance` and `job`, +but the `all_tags` column will contain them. The `all_tags` column is ephemeral and its only purpose to be used in the DEFAULT expression +for the `id` column. + +The types of columns can be adjusted by specifying them explicitly: + +``` sql +CREATE TABLE my_table (instance LowCardinality(String), job LowCardinality(Nullable(String))) +ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'} +``` + +## Table engines of inner target tables {#inner-table-engines} + +By default inner target tables use the following table engines: +- the [data]{#data-table} table uses [MergeTree](../mergetree-family/mergetree); +- the [tags]{#tags-table} table uses [AggregatingMergeTree](../mergetree-family/aggregatingmergetree) because the same data is often inserted multiple times to this table so we need a way +to remove duplicates, and also because it's required to do aggregation for columns `min_time` and `max_time`; +- the [metrics]{#metrics-table} table uses [ReplacingMergeTree](../mergetree-family/replacingmergetree) because the same data is often inserted multiple times to this table so we need a way +to remove duplicates. + +Other table engines also can be used for inner target tables if it's specified so: + +``` sql +CREATE TABLE my_table ENGINE=TimeSeries +DATA ENGINE=ReplicatedMergeTree +TAGS ENGINE=ReplicatedAggregatingMergeTree +METRICS ENGINE=ReplicatedReplacingMergeTree +``` + +## External target tables {#external-target-tables} + +It's possible to make a `TimeSeries` table use a manually created table: + +``` sql +CREATE TABLE data_for_my_table +( + `id` UUID, + `timestamp` DateTime64(3), + `value` Float64 +) +ENGINE = MergeTree +ORDER BY (id, timestamp); + +CREATE TABLE tags_for_my_table ... + +CREATE TABLE metrics_for_my_table ... + +CREATE TABLE my_table ENGINE=TimeSeries DATA data_for_my_table TAGS tags_for_my_table METRICS metrics_for_my_table; +``` + +## Settings {#settings} + +Here is a list of settings which can be specified while defining a `TimeSeries` table: + +| Name | Type | Default | Description | +|---|---|---|---| +| `tags_to_columns` | Map | {} | Map specifying which tags should be put to separate columns in the [tags]{#tags-table} table. Syntax: `{'tag1': 'column1', 'tag2' : column2, ...}` | +| `use_all_tags_column_to_generate_id` | Bool | true | When generating an expression to calculate an identifier of a time series, this flag enables using the `all_tags` column in that calculation | +| `store_min_time_and_max_time` | Bool | true | If set to true then the table will store `min_time` and `max_time` for each time series | +| `aggregate_min_time_and_max_time` | Bool | true | When creating an inner target `tags` table, this flag enables using `SimpleAggregateFunction(min, Nullable(DateTime64(3)))` instead of just `Nullable(DateTime64(3))` as the type of the `min_time` column, and the same for the `max_time` column | +| `filter_by_min_time_and_max_time` | Bool | true | If set to true then the table will use the `min_time` and `max_time` columns for filtering time series | + +# Functions {#functions} + +Here is a list of functions supporting a `TimeSeries` table as an argument: +- [timeSeriesData](../../../sql-reference/table-functions/timeSeriesData.md) +- [timeSeriesTags](../../../sql-reference/table-functions/timeSeriesTags.md) +- [timeSeriesMetrics](../../../sql-reference/table-functions/timeSeriesMetrics.md) diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 5a81313f62e..1057ccb5fee 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -17,85 +17,121 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows: ``` sql SELECT * -FROM table_with_ann_index +FROM table ORDER BY Distance(vectors, Point) LIMIT N ``` -`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md), for example embeddings. -Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function but [other -distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17, -0.33, ...)`, and `N` limits the number of search results. +`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md) or Array(Float64), for example +embeddings. Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function +but [other distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, +e.g. `(0.17, 0.33, ...)`, and `N` limits the number of search results. -An alternative formulation of the nearest neighborhood search problem looks as follows: +This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for +situations where `MaxDistance` is difficult to determine in advance. -``` sql -SELECT * -FROM table_with_ann_index -WHERE Distance(vectors, Point) < MaxDistance -LIMIT N -``` - -While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference -point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where -`MaxDistance` is difficult to determine in advance. - -With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and +With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time). -# Creating and Using ANN Indexes {#creating_using_ann_indexes} +# Creating and Using Vector Similarity Indexes -Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column: +Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column: ```sql -CREATE TABLE table_with_ann_index +CREATE TABLE table ( - `id` Int64, - `vectors` Array(Float32), - INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]] + id Int64, + vectors Array(Float32), + INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N] ) ENGINE = MergeTree ORDER BY id; ``` +Parameters: +- `method`: Supports currently only `hnsw`. +- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a + line between two points in Euclidean space), or `cosineDistance` (the [cosine + distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors). +- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`) +- `m`: the number of neighbors per graph node (optional, default: 16) +- `ef_construction`: (optional, default: 128) +- `ef_search`: (optional, default: 64) + +Value 0 for parameters `m`, `ef_construction`, and `ef_search` refers to the default value. + +Example: + +```sql +CREATE TABLE table +( + id Int64, + vectors Array(Float32), + INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance) +) +ENGINE = MergeTree +ORDER BY id; +``` + +Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW +algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent +similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the +overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive +to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern +Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent +files, without loading them into RAM. + +USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`. + +Vector similarity indexes currently support two distance functions: +- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space + ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). +- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors + ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). + +Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. +If no scalar kind was specified during index creation, `f16` is used as default. + +For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no +distance function was specified during index creation, `L2Distance` is used as default. + +:::note +All arrays must have same length. To avoid errors, you can use a +[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK +length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported. +::: + +:::note +The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see +[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml. +::: + ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write requests. -ANN indexes support two types of queries: - -- ORDER BY queries: +ANN indexes support these queries: ``` sql SELECT * - FROM table_with_ann_index + FROM table [WHERE ...] ORDER BY Distance(vectors, Point) LIMIT N ``` -- WHERE queries: - - ``` sql - SELECT * - FROM table_with_ann_index - WHERE Distance(vectors, Point) < MaxDistance - LIMIT N - ``` - :::tip To avoid writing out large vectors, you can use [query parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g. ```bash -clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" +clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" ``` ::: -**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)` -clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries -without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting +**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause +cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting `max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for approximate neighbor search. @@ -122,128 +158,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY` was specified for ANN indexes, the default value is 100 million. - - -# Available ANN Indexes {#available_ann_indexes} - -- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) - -- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch) - -## Annoy {#annoy} - -Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently -disabled on ARM due to memory safety problems with the algorithm. - -This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random -linear surfaces (lines in 2D, planes in 3D etc.). - -
- -
- -Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column: - -```sql -CREATE TABLE table_with_annoy_index -( - id Int64, - vectors Array(Float32), - INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N] -) -ENGINE = MergeTree -ORDER BY id; -``` - -Annoy currently supports two distance functions: -- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space - ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). -- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors - ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). - -For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no -distance function was specified during index creation, `L2Distance` is used as default. - -Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean -more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes. - -:::note -All arrays must have same length. To avoid errors, you can use a -[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK -length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported. -::: - -The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase -setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be -careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization. - -Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger -values mean more accurate results at the cost of longer query runtime: - -```sql -SELECT * -FROM table_name -ORDER BY L2Distance(vectors, Point) -LIMIT N -SETTINGS annoy_index_search_k_nodes=100; -``` - -:::note -The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see -[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml. -::: - -## USearch {#usearch} - -This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW -algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent -similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the -overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, -that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further -distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient -navigation around immutable persistent files, without loading them into RAM. - -
- -
- -Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column: - -```sql -CREATE TABLE table_with_usearch_index -( - id Int64, - vectors Array(Float32), - INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N] -) -ENGINE = MergeTree -ORDER BY id; -``` - -USearch currently supports two distance functions: -- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space - ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). -- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors - ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). - -USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind -was specified during index creation, `f16` is used as default. - -For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no -distance function was specified during index creation, `L2Distance` is used as default. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7ffbd9a5bae..0b693775dde 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key. -It is usually not necessary to specify the primary key in addition to the primary key. +It is usually not necessary to specify the primary key in addition to the sorting key. #### SAMPLE BY @@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti ## Column-level Settings {#column-level-settings} -Certain MergeTree settings can be override at column level: +Certain MergeTree settings can be overridden at column level: - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. diff --git a/docs/en/interfaces/prometheus.md b/docs/en/interfaces/prometheus.md new file mode 100644 index 00000000000..8e7023cc51f --- /dev/null +++ b/docs/en/interfaces/prometheus.md @@ -0,0 +1,160 @@ +--- +slug: /en/interfaces/prometheus +sidebar_position: 19 +sidebar_label: Prometheus protocols +--- + +# Prometheus protocols + +## Exposing metrics {#expose} + +:::note +ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com. +::: + +ClickHouse can expose its own metrics for scraping from Prometheus: + +```xml + + 9363 + /metrics + true + true + true + true + + +Section `` can be used to make more extended handlers. +This section is similar to [](/en/interfaces/http) but works for prometheus protocols: + +```xml + + 9363 + + + /metrics + + expose_metrics + true + true + true + true + + + + +``` + +Settings: + +| Name | Default | Description | +|---|---|---|---| +| `port` | none | Port for serving the exposing metrics protocol. | +| `endpoint` | `/metrics` | HTTP endpoint for scraping metrics by prometheus server. Starts with `/`. Should not be used with the `` section. | +| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [](/en/interfaces/http) section. | +| `metrics` | true | Expose metrics from the [system.metrics](/en/operations/system-tables/metrics) table. | +| `asynchronous_metrics` | true | Expose current metrics values from the [system.asynchronous_metrics](/en/operations/system-tables/asynchronous_metrics) table. | +| `events` | true | Expose metrics from the [system.events](/en/operations/system-tables/events) table. | +| `errors` | true | Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](/en/operations/system-tables/errors) as well. | + +Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server): +```bash +curl 127.0.0.1:9363/metrics +``` + +## Remote-write protocol {#remote-write} + +ClickHouse supports the [remote-write](https://prometheus.io/docs/specs/remote_write_spec/) protocol. +Data are received by this protocol and written to a [TimeSeries](/en/engines/table-engines/special/time_series) table +(which should be created beforehand). + +```xml + + 9363 + + + /write + + remote_write + db_name + time_series_table
+
+
+
+
+``` + +Settings: + +| Name | Default | Description | +|---|---|---|---| +| `port` | none | Port for serving the `remote-write` protocol. | +| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [](/en/interfaces/http) section. | +| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to write data received by the `remote-write` protocol. This name can optionally contain the name of a database too. | +| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. | + +## Remote-read protocol {#remote-read} + +ClickHouse supports the [remote-read](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/) protocol. +Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) table and sent via this protocol. + +```xml + + 9363 + + + /read + + remote_read + db_name + time_series_table
+
+
+
+
+``` + +Settings: + +| Name | Default | Description | +|---|---|---|---| +| `port` | none | Port for serving the `remote-read` protocol. | +| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [](/en/interfaces/http) section. | +| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to read data to send by the `remote-read` protocol. This name can optionally contain the name of a database too. | +| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. | + +## Configuration for multiple protocols {#multiple-protocols} + +Multiple protocols can be specified together in one place: + +```xml + + 9363 + + + /metrics + + expose_metrics + true + true + true + true + + + + /write + + remote_write + db_name.time_series_table
+
+
+ + /read + + remote_read + db_name.time_series_table
+
+
+
+
+``` diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 05fae994cbe..4afba20d76c 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World Dates, DateTimes: ```sql -DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') +DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}') ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ date │ Nullable(Date) │ │ │ │ │ │ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Arrays: @@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!') Dates, DateTimes: ```sql -DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') +DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n') Dates, DateTimes: ```sql -DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') +DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$) Dates, DateTimes: ```sql -DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) -``` + DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$) + ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$ #### input_format_try_infer_datetimes -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. -If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, +If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part), if at least one field was not parsed as datetime, the result type will be `String`. Enabled by default. @@ -1513,39 +1517,66 @@ Enabled by default. **Examples** ```sql -SET input_format_try_infer_datetimes = 0 +SET input_format_try_infer_datetimes = 0; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql -SET input_format_try_infer_datetimes = 1 +SET input_format_try_infer_datetimes = 1; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "unknown"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "unknown", "datetime64" : "unknown"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_try_infer_datetimes_only_datetime64 + +If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part. + +Disabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 1; +SET input_format_try_infer_datetimes_only_datetime64 = 1; +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} + $$) +``` + +```text +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 5b7615485ca..8d9dce983bc 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -10,7 +10,7 @@ sidebar_label: Visual Interfaces ### ch-ui {#ch-ui} -[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. +[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. Features: @@ -25,7 +25,7 @@ Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) pr Features: -- Works with ClickHouse directly from the browser, without the need to install additional software. +- Works with ClickHouse directly from the browser without the need to install additional software. - Query editor with syntax highlighting. - Auto-completion of commands. - Tools for graphical analysis of query execution. @@ -63,7 +63,7 @@ Features: - Table list with filtering and metadata. - Table preview with filtering and sorting. -- Read-only queries execution. +- Read-only query execution. ### Redash {#redash} @@ -75,23 +75,23 @@ Features: - Powerful editor of queries. - Database explorer. -- Visualization tools, that allow you to represent data in different forms. +- Visualization tool that allows you to represent data in different forms. ### Grafana {#grafana} [Grafana](https://grafana.com/grafana/plugins/grafana-clickhouse-datasource/) is a platform for monitoring and visualization. -"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. Trusted and loved by the community" — grafana.com. +"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data-driven culture. Trusted and loved by the community" — grafana.com. -ClickHouse datasource plugin provides a support for ClickHouse as a backend database. +ClickHouse data source plugin provides support for ClickHouse as a backend database. -### qryn (#qryn) +### qryn {#qryn} [qryn](https://metrico.in) is a polyglot, high-performance observability stack for ClickHouse _(formerly cLoki)_ with native Grafana integrations allowing users to ingest and analyze logs, metrics and telemetry traces from any agent supporting Loki/LogQL, Prometheus/PromQL, OTLP/Tempo, Elastic, InfluxDB and many more. Features: -- Built in Explore UI and LogQL CLI for querying, extracting and visualizing data +- Built-in Explore UI and LogQL CLI for querying, extracting and visualizing data - Native Grafana APIs support for querying, processing, ingesting, tracing and alerting without plugins - Powerful pipeline to dynamically search, filter and extract data from logs, events, traces and beyond - Ingestion and PUSH APIs transparently compatible with LogQL, PromQL, InfluxDB, Elastic and many more @@ -139,7 +139,7 @@ Features: ### DBM {#dbm} -[DBM](https://dbm.incubator.edurt.io/) DBM is a visual management tool for ClickHouse! +[DBM](https://github.com/devlive-community/dbm) DBM is a visual management tool for ClickHouse! Features: @@ -151,7 +151,7 @@ Features: - Support custom query - Support multiple data sources management(connection test, monitoring) - Support monitor (processor, connection, query) -- Support migrate data +- Support migrating data ### Bytebase {#bytebase} @@ -169,7 +169,7 @@ Features: ### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse} -[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries. +[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with the JDBC interpreter, it can provide better timeout control for long-running queries. ### ClickCat {#clickcat} @@ -179,7 +179,7 @@ Features: - An online SQL editor which can run your SQL code without any installing. - You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. -- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. +- The Metrics contain Cluster Analysis, Data Analysis, and Query Analysis. ### ClickVisual {#clickvisual} @@ -332,7 +332,7 @@ Learn more about the product at [TABLUM.IO](https://tablum.io/) ### CKMAN {#ckman} -[CKMAN] (https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters! +[CKMAN](https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters! Features: diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 59ee05d1f9e..1c82aeaaf2c 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -73,13 +73,21 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen ### Storage for named collections -Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used. +Named collections can either be stored on local disk or in ZooKeeper/Keeper. By default local storage is used. +They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system), +where `aes_128_ctr` is used by default. -To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file: +To configure named collections storage you need to specify a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage, +you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`. + +To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to +`named_collections_storage` section in configuration file. The following example uses encryption and ZooKeeper/Keeper: ``` - zookeeper + zookeeper_encrypted + bebec0cabebec0cabebec0cabebec0ca + aes_128_ctr /named_collections_path/ 1000 @@ -307,8 +315,22 @@ SELECT dictGet('dict', 'B', 2); ## Named collections for accessing PostgreSQL database -The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). +The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). Additionally, there are aliases: +- `username` for `user` +- `db` for `database`. + +Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority: + +```sql +CASE + WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr'] + WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432') + WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432') +END +``` + +Example of creation: ```sql CREATE NAMED COLLECTION mypg AS user = 'pguser', @@ -316,7 +338,7 @@ password = 'jw8s0F4', host = '127.0.0.1', port = 5432, database = 'test', -schema = 'test_schema', +schema = 'test_schema' ``` Example of configuration: @@ -369,6 +391,10 @@ SELECT * FROM mypgtable; └───┘ ``` +:::note +PostgreSQL copies data from the named collection when the table is being created. A change in the collection does not affect the existing tables. +::: + ### Example of using named collections with database with engine PostgreSQL ```sql @@ -478,7 +504,7 @@ kafka_topic_list = 'kafka_topic', kafka_group_name = 'consumer_group', kafka_format = 'JSONEachRow', kafka_max_block_size = '1048576'; - + ``` ### XML example diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 7a920671fc2..955cec0234e 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -143,6 +143,20 @@ value can be specified at session, profile or query level using setting [query_c Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). +Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting +[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache +considers results of the same query with different tags different. + +Example for creating three different query cache entries for the same query: + +```sql +SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string) +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1'; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2'; +``` + +To remove only entries with tag `tag` from the query cache, you can use statement `SYSTEM DROP QUERY CACHE TAG 'tag'`. + ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation, etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a1e3c292b04..9fce83a0dc4 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing 10 ``` +## max_keep_alive_requests {#max-keep-alive-requests} + +Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000. + +**Example** + +``` xml +10 +``` + ## listen_host {#listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -2112,48 +2122,6 @@ The trailing slash is mandatory. /var/lib/clickhouse/ ``` -## Prometheus {#prometheus} - -:::note -ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com. -::: - -Exposing metrics data for scraping from [Prometheus](https://prometheus.io). - -Settings: - -- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. -- `port` – Port for `endpoint`. -- `metrics` – Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. -- `events` – Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- `asynchronous_metrics` – Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. -- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well. - -**Example** - -``` xml - - 0.0.0.0 - 8123 - 9000 - - - /metrics - 9363 - true - true - true - true - - - -``` - -Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server): -```bash -curl 127.0.0.1:9363/metrics -``` - ## query_log {#query-log} Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 67fa45c20cd..a13aacc76e6 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1041,3 +1041,27 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%. This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values. High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting. + +## lightweight_mutation_projection_mode + +By default, lightweight delete `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. So the default value would be `throw`. +However, this option can change the behavior. With the value either `drop` or `rebuild`, deletes will work with projections. `drop` would delete the projection so it might be fast in the current query as projection gets deleted but slow in future queries as no projection attached. +`rebuild` would rebuild the projection which might affect the performance of the current query, but might speedup for future queries. A good thing is that these options would only work in the part level, +which means projections in the part that don't get touched would stay intact instead of triggering any action like drop or rebuild. + +Possible values: + +- throw, drop, rebuild + +Default value: throw + +## deduplicate_merge_projection_mode + +Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting. +It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. Similar to the option `lightweight_mutation_projection_mode`, it is also part level. + +Possible values: + +- throw, drop, rebuild + +Default value: throw \ No newline at end of file diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f8b40cd81ac..fcec0afb8d2 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -194,6 +194,17 @@ If enabled, ClickHouse will try to infer type `DateTime64` from string fields in Enabled by default. +## input_format_try_infer_variants {#input_format_try_infer_variants} + +If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + ## date_time_input_format {#date_time_input_format} Allows choosing a parser of the text representation of date and time. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 968481062e9..7bd36ccd00f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1381,7 +1381,7 @@ Default value: `2`. Close connection before returning connection to the pool. -Default value: true. +Default value: false. ## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size} @@ -1800,6 +1800,17 @@ Possible values: Default value: `0`. +## query_cache_tag {#query-cache-tag} + +A string which acts as a label for [query cache](../query-cache.md) entries. +The same queries with different tags are considered different by the query cache. + +Possible values: + +- Any string + +Default value: `''` + ## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes} The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited. @@ -2844,7 +2855,7 @@ The minimum chunk size in bytes, which each thread will parse in parallel. ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms} -Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. +Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. Possible values: @@ -2852,6 +2863,16 @@ Possible values: Default value: `5000`. +## max_merge_selecting_sleep_ms + +Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. + +Possible values: + +- Any positive integer. + +Default value: `60000`. + ## parallel_distributed_insert_select {#parallel_distributed_insert_select} Enables parallel distributed `INSERT ... SELECT` query. @@ -5609,6 +5630,19 @@ Minimal size of block to compress in CROSS JOIN. Zero value means - disable this Default value: `1GiB`. +## use_json_alias_for_old_object_type + +When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/json.md) type instead of the new [JSON](../../sql-reference/data-types/newjson.md) type. +This setting requires server restart to take effect when changed. + +Default value: `false`. + +## type_json_skip_duplicated_paths + +When enabled, ClickHouse will skip duplicated paths during parsing of [JSON](../../sql-reference/data-types/newjson.md) object. Only the value of the first occurrence of each path will be inserted. + +Default value: `false` + ## restore_replace_external_engines_to_null For testing purposes. Replaces all external engines to Null to not initiate external connections. @@ -5626,3 +5660,26 @@ Default value: `False` Disable all insert and mutations (alter table update / alter table delete / alter table drop partition). Set to true, can make this node focus on reading queries. Default value: `false`. + +## use_hive_partitioning + +When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +Default value: `false`. + +## allow_experimental_time_series_table {#allow-experimental-time-series-table} + +Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine. + +Possible values: + +- 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled. +- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled. + +Default value: `0`. + +## create_if_not_exists + +Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown. + +Default value: `false`. diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md index 7e28a251e26..d58c9f754fd 100644 --- a/docs/en/operations/system-tables/kafka_consumers.md +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -24,6 +24,7 @@ Columns: - `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions - `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster - `is_currently_used`, (UInt8) - consumer is in use +- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds - `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). Example: diff --git a/docs/en/operations/system-tables/query_cache.md b/docs/en/operations/system-tables/query_cache.md index a9f86f5fc2b..9c48574a329 100644 --- a/docs/en/operations/system-tables/query_cache.md +++ b/docs/en/operations/system-tables/query_cache.md @@ -9,6 +9,7 @@ Columns: - `query` ([String](../../sql-reference/data-types/string.md)) — Query string. - `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry. +- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry. - `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale. - `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users. - `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed. @@ -26,6 +27,7 @@ Row 1: ────── query: SELECT 1 SETTINGS use_query_cache = 1 result_size: 128 +tag: stale: 0 shared: 0 compressed: 1 diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 5adc33de37f..a60de2a08d1 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/trace_log --- # trace_log -Contains stack traces collected by the sampling query profiler. +Contains stack traces collected by the [sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step), [memory_profiler_sample_probability](../../operations/settings/settings.md#memory_profiler_sample_probability), [trace_profile_events](../../operations/settings/settings.md#trace_profile_events). diff --git a/docs/en/operations/system-tables/view_refreshes.md b/docs/en/operations/system-tables/view_refreshes.md index 12377507b39..e792e0d095d 100644 --- a/docs/en/operations/system-tables/view_refreshes.md +++ b/docs/en/operations/system-tables/view_refreshes.md @@ -17,7 +17,8 @@ Columns: - `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — How long the last refresh attempt took. - `next_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time at which the next refresh is scheduled to start. - `remaining_dependencies` ([Array(String)](../../sql-reference/data-types/array.md)) — If the view has [refresh dependencies](../../sql-reference/statements/create/view.md#refresh-dependencies), this array contains the subset of those dependencies that are not satisfied for the current refresh yet. If `status = 'WaitingForDependencies'`, a refresh is ready to start as soon as these dependencies are fulfilled. -- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Exception'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace. +- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Error'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace. +- `retry` ([UInt64](../../sql-reference/data-types/int-uint.md)) — If nonzero, the current or next refresh is a retry (see `refresh_retries` refresh setting), and `retry` is the 1-based index of that retry. - `refresh_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of successful refreshes since last server restart or table creation. - `progress` ([Float64](../../sql-reference/data-types/float.md)) — Progress of the current refresh, between 0 and 1. - `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of rows read by the current refresh so far. diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index 812e946e43e..08fb664126a 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -12,57 +12,59 @@ This specification describes the binary format that can be used for binary encod The table below describes how each data type is represented in binary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. `var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression. -| ClickHouse data type | Binary encoding | -|--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Nothing` | `0x00` | -| `UInt8` | `0x01` | -| `UInt16` | `0x02` | -| `UInt32` | `0x03` | -| `UInt64` | `0x04` | -| `UInt128` | `0x05` | -| `UInt256` | `0x06` | -| `Int8` | `0x07` | -| `Int16` | `0x08` | -| `Int32` | `0x09` | -| `Int64` | `0x0A` | -| `Int128` | `0x0B` | -| `Int256` | `0x0C` | -| `Float32` | `0x0D` | -| `Float64` | `0x0E` | -| `Date` | `0x0F` | -| `Date32` | `0x10` | -| `DateTime` | `0x11` | -| `DateTime(time_zone)` | `0x12` | -| `DateTime64(P)` | `0x13` | -| `DateTime64(P, time_zone)` | `0x14` | -| `String` | `0x15` | -| `FixedString(N)` | `0x16` | -| `Enum8` | `0x17...` | -| `Enum16` | `0x18...>` | -| `Decimal32(P, S)` | `0x19` | -| `Decimal64(P, S)` | `0x1A` | -| `Decimal128(P, S)` | `0x1B` | -| `Decimal256(P, S)` | `0x1C` | -| `UUID` | `0x1D` | -| `Array(T)` | `0x1E` | -| `Tuple(T1, ..., TN)` | `0x1F...` | -| `Tuple(name1 T1, ..., nameN TN)` | `0x20...` | -| `Set` | `0x21` | -| `Interval` | `0x22` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | -| `Nullable(T)` | `0x23` | -| `Function` | `0x24...` | -| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | -| `LowCardinality(T)` | `0x26` | -| `Map(K, V)` | `0x27` | -| `IPv4` | `0x28` | -| `IPv6` | `0x29` | -| `Variant(T1, ..., TN)` | `0x2A...` | -| `Dynamic(max_types=N)` | `0x2B` | -| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C` | -| `Bool` | `0x2D` | -| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | -| `Nested(name1 T1, ..., nameN TN)` | `0x2F...` | +| ClickHouse data type | Binary encoding | +|-----------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Nothing` | `0x00` | +| `UInt8` | `0x01` | +| `UInt16` | `0x02` | +| `UInt32` | `0x03` | +| `UInt64` | `0x04` | +| `UInt128` | `0x05` | +| `UInt256` | `0x06` | +| `Int8` | `0x07` | +| `Int16` | `0x08` | +| `Int32` | `0x09` | +| `Int64` | `0x0A` | +| `Int128` | `0x0B` | +| `Int256` | `0x0C` | +| `Float32` | `0x0D` | +| `Float64` | `0x0E` | +| `Date` | `0x0F` | +| `Date32` | `0x10` | +| `DateTime` | `0x11` | +| `DateTime(time_zone)` | `0x12` | +| `DateTime64(P)` | `0x13` | +| `DateTime64(P, time_zone)` | `0x14` | +| `String` | `0x15` | +| `FixedString(N)` | `0x16` | +| `Enum8` | `0x17...` | +| `Enum16` | `0x18...>` | +| `Decimal32(P, S)` | `0x19` | +| `Decimal64(P, S)` | `0x1A` | +| `Decimal128(P, S)` | `0x1B` | +| `Decimal256(P, S)` | `0x1C` | +| `UUID` | `0x1D` | +| `Array(T)` | `0x1E` | +| `Tuple(T1, ..., TN)` | `0x1F...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x20...` | +| `Set` | `0x21` | +| `Interval` | `0x22` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | +| `Nullable(T)` | `0x23` | +| `Function` | `0x24...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `LowCardinality(T)` | `0x26` | +| `Map(K, V)` | `0x27` | +| `IPv4` | `0x28` | +| `IPv6` | `0x29` | +| `Variant(T1, ..., TN)` | `0x2A...` | +| `Dynamic(max_types=N)` | `0x2B` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C` | +| `Bool` | `0x2D` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x2F...` | +| `JSON(max_dynamic_paths=N, max_dynamic_types=M, path Type, SKIP skip_path, SKIP REGEXP skip_path_regexp)` | `0x30.........` | +For type `JSON` byte `uint8_serialization_version` indicates the version of the serialization. Right now the version is always 0 but can change in future if new arguments will be introduced for `JSON` type. ### Interval kind binary encoding diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 8be81471377..f9befd166fe 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax: Dynamic(max_types=N) ``` -Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. +Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`. :::note The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. @@ -224,41 +224,43 @@ SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test; └───────┴────────────────┘ ``` -If `K < N`, then the values with the rarest types are converted to `String`: +If `K < N`, then the values with the rarest types will be inserted into a single special subcolumn, but still will be accessible: ```text CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); -SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test; +SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test; ``` ```text -┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ -│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ -│ 42 │ Int64 │ 42 │ Int64 │ -│ 43 │ Int64 │ 43 │ Int64 │ -│ 42.42 │ String │ 42.42 │ String │ -│ true │ Bool │ true │ String │ -│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ -└─────────┴────────────────┴─────────┴─────────────────┘ +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │ +│ 42 │ Int64 │ 42 │ Int64 │ false │ +│ 43 │ Int64 │ 43 │ Int64 │ false │ +│ 42.42 │ String │ 42.42 │ String │ false │ +│ true │ Bool │ true │ Bool │ true │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │ +└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘ ``` -If `K=1`, all types are converted to `String`: +Functions `isDynamicElementInSharedData` returns `true` for rows that are stored in a special shared data structure inside `Dynamic` and as we can see, resulting column contains only 2 types that are not stored in shared data structure. + +If `K=0`, all types will be inserted into single special subcolumn: ```text CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); -SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test; +SELECT d, dynamicType(d), d::Dynamic(max_types=0) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test; ``` ```text -┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ -│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ -│ 42 │ Int64 │ 42 │ String │ -│ 43 │ Int64 │ 43 │ String │ -│ 42.42 │ String │ 42.42 │ String │ -│ true │ Bool │ true │ String │ -│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ -└─────────┴────────────────┴─────────┴─────────────────┘ +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │ +│ 42 │ Int64 │ 42 │ Int64 │ true │ +│ 43 │ Int64 │ 43 │ Int64 │ true │ +│ 42.42 │ String │ 42.42 │ String │ true │ +│ true │ Bool │ true │ Bool │ true │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │ +└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘ ``` ## Reading Dynamic type from the data @@ -411,17 +413,17 @@ SELECT d, dynamicType(d) FROM test ORDER by d; ## Reaching the limit in number of different data types stored inside Dynamic -`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic). -When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values. +`Dynamic` data type can store only limited number of different data types as separate subcolumns. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 0 and 254 (due to implementation details, it's impossible to have more than 254 different data types that can be stored as separate subcolumns inside Dynamic). +When the limit is reached, all new data types inserted to `Dynamic` column will be inserted into a single shared data structure that stores values with different data types in binary form. Let's see what happens when the limit is reached in different scenarios. ### Reaching the limit during data parsing -During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values: +During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted into shared data structure: ```sql -SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' +SELECT d, dynamicType(d), isDynamicElementInSharedData(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' {"d" : 42} {"d" : [1, 2, 3]} {"d" : "Hello, World!"} @@ -432,22 +434,22 @@ SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' ``` ```text -┌─d──────────────────────────┬─dynamicType(d)─┐ -│ 42 │ Int64 │ -│ [1,2,3] │ Array(Int64) │ -│ Hello, World! │ String │ -│ 2020-01-01 │ String │ -│ ["str1", "str2", "str3"] │ String │ -│ {"a" : 1, "b" : [1, 2, 3]} │ String │ -└────────────────────────────┴────────────────┘ +┌─d──────────────────────┬─dynamicType(d)─────────────────┬─isDynamicElementInSharedData(d)─┐ +│ 42 │ Int64 │ false │ +│ [1,2,3] │ Array(Int64) │ false │ +│ Hello, World! │ String │ false │ +│ 2020-01-01 │ Date │ true │ +│ ['str1','str2','str3'] │ Array(String) │ true │ +│ (1,[1,2,3]) │ Tuple(a Int64, b Array(Int64)) │ true │ +└────────────────────────┴────────────────────────────────┴─────────────────────────────────┘ ``` -As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`. +As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were inserted into special shared data structure. ### During merges of data parts in MergeTree table engines -During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts. -In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation. +During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types that can be stored in separate subcolumns inside and won't be able to store all types as subcolumns from source parts. +In this case ClickHouse chooses what types will remain as separate subcolumns after merge and what types will be inserted into shared data structure. In most cases ClickHouse tries to keep the most frequent types and store the rarest types in shared data structure, but it depends on the implementation. Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types: @@ -463,17 +465,17 @@ INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1); Each insert will create a separate data pert with `Dynamic` column containing single type: ```sql -SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count(); ``` ```text -┌─count()─┬─dynamicType(d)──────┬─_part─────┐ -│ 5 │ UInt64 │ all_1_1_0 │ -│ 4 │ Array(UInt64) │ all_2_2_0 │ -│ 3 │ Date │ all_3_3_0 │ -│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │ -│ 1 │ String │ all_5_5_0 │ -└─────────┴─────────────────────┴───────────┘ +┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐ +│ 5 │ UInt64 │ false │ all_1_1_0 │ +│ 4 │ Array(UInt64) │ false │ all_2_2_0 │ +│ 3 │ Date │ false │ all_3_3_0 │ +│ 2 │ Map(UInt64, UInt64) │ false │ all_4_4_0 │ +│ 1 │ String │ false │ all_5_5_0 │ +└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘ ``` Now, let's merge all parts into one and see what will happen: @@ -481,18 +483,20 @@ Now, let's merge all parts into one and see what will happen: ```sql SYSTEM START MERGES test; OPTIMIZE TABLE test FINAL; -SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count() desc; ``` ```text -┌─count()─┬─dynamicType(d)─┬─_part─────┐ -│ 5 │ UInt64 │ all_1_5_2 │ -│ 6 │ String │ all_1_5_2 │ -│ 4 │ Array(UInt64) │ all_1_5_2 │ -└─────────┴────────────────┴───────────┘ +┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐ +│ 5 │ UInt64 │ false │ all_1_5_2 │ +│ 4 │ Array(UInt64) │ false │ all_1_5_2 │ +│ 3 │ Date │ false │ all_1_5_2 │ +│ 2 │ Map(UInt64, UInt64) │ true │ all_1_5_2 │ +│ 1 │ String │ true │ all_1_5_2 │ +└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘ ``` -As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. +As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` as subcolumns and inserted all other types into shared data. ## JSONExtract functions with Dynamic @@ -509,22 +513,23 @@ SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType( ``` ```sql -SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types``` +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Dynamic)') AS map_of_dynamics, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamics) AS map_of_dynamic_types +``` ```text -┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐ -│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ -└──────────────────────────────────┴─────────────────────────────────────────────────┘ +┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'Int64','b':'String','c':'Array(Nullable(Int64))'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────────────┘ ``` ```sql -SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types``` +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Dynamic') AS dynamics, arrayMap(x -> (x.1, dynamicType(x.2)), dynamics) AS dynamic_types``` ``` ```text -┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐ -│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ -└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','Int64'),('b','String'),('c','Array(Nullable(Int64))')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────────────┘ ``` ### Binary output format diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 7ffc7447d96..8ce53bb2ef2 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -52,6 +52,48 @@ Result: └───────────────────────────────┴───────────────┘ ``` +## LineString + +`LineString` is a line stored as an array of points: [Array](array.md)([Point](#point)). + +**Example** + +Query: + +```sql +CREATE TABLE geo_linestring (l LineString) ENGINE = Memory(); +INSERT INTO geo_linestring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]); +SELECT l, toTypeName(l) FROM geo_linestring; +``` +Result: + +``` text +┌─r─────────────────────────────┬─toTypeName(r)─┐ +│ [(0,0),(10,0),(10,10),(0,10)] │ LineString │ +└───────────────────────────────┴───────────────┘ +``` + +## MultiLineString + +`MultiLineString` is multiple lines stored as an array of `LineString`: [Array](array.md)([LineString](#linestring)). + +**Example** + +Query: + +```sql +CREATE TABLE geo_multilinestring (l MultiLineString) ENGINE = Memory(); +INSERT INTO geo_multilinestring VALUES([[(0, 0), (10, 0), (10, 10), (0, 10)], [(1, 1), (2, 2), (3, 3)]]); +SELECT l, toTypeName(l) FROM geo_multilinestring; +``` +Result: + +``` text +┌─l───────────────────────────────────────────────────┬─toTypeName(l)───┐ +│ [[(0,0),(10,0),(10,10),(0,10)],[(1,1),(2,2),(3,3)]] │ MultiLineString │ +└─────────────────────────────────────────────────────┴─────────────────┘ +``` + ## Polygon `Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index fcb0b60d022..2b89dd145e6 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -19,7 +19,8 @@ ClickHouse data types include: - **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md) - **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md) - **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time -- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column +- **Object**: the [`Object`](./json.md) stores a JSON document in a single column (deprecated) +- **JSON**: the [`JSON` object](./newjson.md) stores a JSON document in a single column - **UUID**: a performant option for storing [`UUID` values](./uuid.md) - **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column - **Arrays**: any column can be defined as an [`Array` of values](./array.md) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index f218c8d0339..e48b308a620 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -13,7 +13,7 @@ keywords: [object, data type] Stores JavaScript Object Notation (JSON) documents in a single column. -`JSON` is an alias for `Object('json')`. +`JSON` can be used as an alias to `Object('json')` when setting [use_json_alias_for_old_object_type](../../operations/settings/settings.md#usejsonaliasforoldobjecttype) is enabled. ## Example @@ -79,5 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow ## Related Content -- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json) +- [Using JSON in ClickHouse](/en/integrations/data-formats/json/overview) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md new file mode 100644 index 00000000000..f7fc7e1498e --- /dev/null +++ b/docs/en/sql-reference/data-types/newjson.md @@ -0,0 +1,516 @@ +--- +slug: /en/sql-reference/data-types/newjson +sidebar_position: 63 +sidebar_label: JSON +keywords: [json, data type] +--- + +# JSON + +Stores JavaScript Object Notation (JSON) documents in a single column. + +:::note +This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. +If you want to use JSON type, set `allow_experimental_json_type = 1`. +::: + +To declare a column of `JSON` type, use the following syntax: + +``` sql + JSON(max_dynamic_paths=N, max_dynamic_types=M, some.path TypeName, SKIP path.to.skip, SKIP REGEXP 'paths_regexp') +``` +Where: +- `max_dynamic_paths` is an optional parameter indicating how many paths can be stored separately as subcolumns across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all other paths will be stored together in a single structure. Default value of `max_dynamic_paths` is `1024`. +- `max_dynamic_types` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a single path column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_dynamic_types` is `32`. +- `some.path TypeName` is an optional type hint for particular path in the JSON. Such paths will be always stored as subcolumns with specified type. +- `SKIP path.to.skip` is an optional hint for particular path that should be skipped during JSON parsing. Such paths will never be stored in the JSON column. If specified path is a nested JSON object, the whole nested object will be skipped. +- `SKIP REGEXP 'path_regexp'` is an optional hint with a regular expression that is used to skip paths during JSON parsing. All paths that match this regular expression will never be stored in the JSON column. + +## Creating JSON + +Using `JSON` type in table column definition: + +```sql +CREATE TABLE test (json JSON) ENGINE = Memory; +INSERT INTO test VALUES ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), ('{"f" : "Hello, World!"}'), ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}'); +SELECT json FROM test; +``` + +```text +┌─json────────────────────────────────────────┐ +│ {"a":{"b":"42"},"c":["1","2","3"]} │ +│ {"f":"Hello, World!"} │ +│ {"a":{"b":"43","e":"10"},"c":["4","5","6"]} │ +└─────────────────────────────────────────────┘ +``` + +```sql +CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory; +INSERT INTO test VALUES ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), ('{"f" : "Hello, World!"}'), ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}'); +SELECT json FROM test; +``` + +```text +┌─json──────────────────────────────┐ +│ {"a":{"b":42},"c":[1,2,3]} │ +│ {"a":{"b":0},"f":"Hello, World!"} │ +│ {"a":{"b":43},"c":[4,5,6]} │ +└───────────────────────────────────┘ +``` + +Using CAST from 'String': + +```sql +SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json; +``` + +```text +┌─json───────────────────────────────────────────┐ +│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │ +└────────────────────────────────────────────────┘ +``` + +CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. + +## Reading JSON paths as subcolumns + +JSON type supports reading every path as a separate subcolumn. If type of the requested path was not specified in the JSON type declaration, the subcolumn of the path will always have type [Dynamic](/docs/en/sql-reference/data-types/dynamic.md). + +For example: + +```sql +CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory; +INSERT INTO test VALUES ('{"a" : {"b" : 42, "g" : 42.42}, "c" : [1, 2, 3], "d" : "2020-01-01"}'), ('{"f" : "Hello, World!", "d" : "2020-01-02"}'), ('{"a" : {"b" : 43, "e" : 10, "g" : 43.43}, "c" : [4, 5, 6]}'); +SELECT json FROM test; +``` + +```text +┌─json──────────────────────────────────────────────────┐ +│ {"a":{"b":42,"g":42.42},"c":[1,2,3],"d":"2020-01-01"} │ +│ {"a":{"b":0},"d":"2020-01-02","f":"Hello, World!"} │ +│ {"a":{"b":43,"g":43.43},"c":[4,5,6]} │ +└───────────────────────────────────────────────────────┘ +``` + +```sql +SELECT json.a.b, json.a.g, json.c, json.d FROM test; +``` + +```text +┌─json.a.b─┬─json.a.g─┬─json.c──┬─json.d─────┐ +│ 42 │ 42.42 │ [1,2,3] │ 2020-01-01 │ +│ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-02 │ +│ 43 │ 43.43 │ [4,5,6] │ ᴺᵁᴸᴸ │ +└──────────┴──────────┴─────────┴────────────┘ +``` + +If the requested path wasn't found in the data, it will be filled with `NULL` values: + +```sql +SELECT json.non.existing.path FROM test; +``` + +```text +┌─json.non.existing.path─┐ +│ ᴺᵁᴸᴸ │ +│ ᴺᵁᴸᴸ │ +│ ᴺᵁᴸᴸ │ +└────────────────────────┘ +``` + +Let's check the data types of returned subcolumns: +```sql +SELECT toTypeName(json.a.b), toTypeName(json.a.g), toTypeName(json.c), toTypeName(json.d) FROM test; +``` + +```text +┌─toTypeName(json.a.b)─┬─toTypeName(json.a.g)─┬─toTypeName(json.c)─┬─toTypeName(json.d)─┐ +│ UInt32 │ Dynamic │ Dynamic │ Dynamic │ +│ UInt32 │ Dynamic │ Dynamic │ Dynamic │ +│ UInt32 │ Dynamic │ Dynamic │ Dynamic │ +└──────────────────────┴──────────────────────┴────────────────────┴────────────────────┘ +``` + +As we can see, for `a.b` the type is `UInt32` as we specified in the JSON type declaration, and for all other subcolumns the type is `Dynamic`. + +It is also possible to read subcolumns of a `Dynamic` type using special syntax `json.some.path.:TypeName`: + +```sql +select json.a.g.:Float64, dynamicType(json.a.g), json.d.:Date, dynamicType(json.d) FROM test; +``` + +```text +┌─json.a.g.:`Float64`─┬─dynamicType(json.a.g)─┬─json.d.:`Date`─┬─dynamicType(json.d)─┐ +│ 42.42 │ Float64 │ 2020-01-01 │ Date │ +│ ᴺᵁᴸᴸ │ None │ 2020-01-02 │ Date │ +│ 43.43 │ Float64 │ ᴺᵁᴸᴸ │ None │ +└─────────────────────┴───────────────────────┴────────────────┴─────────────────────┘ +``` + +`Dynamic` subcolumns can be casted to any data type. In this case the exception will be thrown if internal type inside `Dynamic` cannot be casted to the requested type: + +```sql +select json.a.g::UInt64 as uint FROM test; +``` + +```text +┌─uint─┐ +│ 42 │ +│ 0 │ +│ 43 │ +└──────┘ +``` + +```sql +select json.a.g::UUID as float FROM test; +``` + +```text +Received exception: +Code: 48. DB::Exception: Conversion between numeric types and UUID is not supported. Probably the passed UUID is unquoted: while executing 'FUNCTION CAST(__table1.json.a.g :: 2, 'UUID'_String :: 1) -> CAST(__table1.json.a.g, 'UUID'_String) UUID : 0'. (NOT_IMPLEMENTED) +``` + +## Reading JSON sub-objects as subcolumns + +JSON type supports reading nested objects as subcolumns with type `JSON` using special syntax `json.^some.path`: + +```sql +CREATE TABLE test (json JSON) ENGINE = Memory; +INSERT INTO test VALUES ('{"a" : {"b" : {"c" : 42, "g" : 42.42}}, "c" : [1, 2, 3], "d" : {"e" : {"f" : {"g" : "Hello, World", "h" : [1, 2, 3]}}}}'), ('{"f" : "Hello, World!", "d" : {"e" : {"f" : {"h" : [4, 5, 6]}}}}'), ('{"a" : {"b" : {"c" : 43, "e" : 10, "g" : 43.43}}, "c" : [4, 5, 6]}'); +SELECT json FROM test; +``` + +```text +┌─json────────────────────────────────────────────────────────────────────────────────────────┐ +│ {"a":{"b":{"c":42,"g":42.42}},"c":[1,2,3],"d":{"e":{"f":{"g":"Hello, World","h":[1,2,3]}}}} │ +│ {"d":{"e":{"f":{"h":[4,5,6]}}},"f":"Hello, World!"} │ +│ {"a":{"b":{"c":43,"e":10,"g":43.43}},"c":[4,5,6]} │ +└─────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +```sql +SELECT json.^a.b, json.^d.e.f FROM test; +``` + +```text +┌─json.^`a`.b───────────────┬─json.^`d`.e.f────────────────────┐ +│ {"c":42,"g":42.42} │ {"g":"Hello, World","h":[1,2,3]} │ +│ {} │ {"h":[4,5,6]} │ +│ {"c":43,"e":10,"g":43.43} │ {} │ +└───────────────────────────┴──────────────────────────────────┘ +``` + +:::note +Reading sub-objects as subcolumns may be inefficient, as this may require almost full scan of the JSON data. +::: + +## Types inference for paths + +During JSON parsing ClickHouse tries to detect the most appropriate data type for each JSON path. It works similar to [automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) and controlled by the same settings: + +- [input_format_try_infer_integers](/docs/en/interfaces/schema-inference.md#inputformattryinferintegers) +- [input_format_try_infer_dates](/docs/en/interfaces/schema-inference.md#inputformattryinferdates) +- [input_format_try_infer_datetimes](/docs/en/interfaces/schema-inference.md#inputformattryinferdatetimes) +- [schema_inference_make_columns_nullable](/docs/en/interfaces/schema-inference.md#schemainferencemakecolumnsnullable) +- [input_format_json_try_infer_numbers_from_strings](/docs/en/interfaces/schema-inference.md#inputformatjsontryinfernumbersfromstrings) +- [input_format_json_infer_incomplete_types_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsoninferincompletetypesasstrings) +- [input_format_json_read_numbers_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadnumbersasstrings) +- [input_format_json_read_bools_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadboolsasstrings) +- [input_format_json_read_bools_as_numbers](/docs/en/interfaces/schema-inference.md#inputformatjsonreadboolsasnumbers) +- [input_format_json_read_arrays_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadarraysasstrings) + +Let's see some examples: + +```sql +SELECT JSONAllPathsWithTypes('{"a" : "2020-01-01", "b" : "2020-01-01 10:00:00"}'::JSON) AS paths_with_types settings input_format_try_infer_dates=1, input_format_try_infer_datetimes=1; +``` + +```text +┌─paths_with_types─────────────────┐ +│ {'a':'Date','b':'DateTime64(9)'} │ +└──────────────────────────────────┘ +``` + +```sql +SELECT JSONAllPathsWithTypes('{"a" : "2020-01-01", "b" : "2020-01-01 10:00:00"}'::JSON) AS paths_with_types settings input_format_try_infer_dates=0, input_format_try_infer_datetimes=0; +``` + +```text +┌─paths_with_types────────────┐ +│ {'a':'String','b':'String'} │ +└─────────────────────────────┘ +``` + +```sql +SELECT JSONAllPathsWithTypes('{"a" : [1, 2, 3]}'::JSON) AS paths_with_types settings schema_inference_make_columns_nullable=1; +``` + +```text +┌─paths_with_types───────────────┐ +│ {'a':'Array(Nullable(Int64))'} │ +└────────────────────────────────┘ +``` + +```sql +SELECT JSONAllPathsWithTypes('{"a" : [1, 2, 3]}'::JSON) AS paths_with_types settings schema_inference_make_columns_nullable=0; +``` + +```text +┌─paths_with_types─────┐ +│ {'a':'Array(Int64)'} │ +└──────────────────────┘ +``` + +## Handling arrays of JSON objects + +JSON paths that contains an array of objects are parsed as type `Array(JSON)` and inserted into `Dynamic` column for this path. To read an array of objects you can extract it from `Dynamic` column as a subcolumn: + +```sql +CREATE TABLE test (json JSON) ENGINE = Memory; +INSERT INTO test VALUES +('{"a" : {"b" : [{"c" : 42, "d" : "Hello", "f" : [[{"g" : 42.42}]], "k" : {"j" : 1000}}, {"c" : 43}, {"e" : [1, 2, 3], "d" : "My", "f" : [[{"g" : 43.43, "h" : "2020-01-01"}]], "k" : {"j" : 2000}}]}}'), +('{"a" : {"b" : [1, 2, 3]}}'), +('{"a" : {"b" : [{"c" : 44, "f" : [[{"h" : "2020-01-02"}]]}, {"e" : [4, 5, 6], "d" : "World", "f" : [[{"g" : 44.44}]], "k" : {"j" : 3000}}]}}'); +SELECT json FROM test; +``` + +```text3 +┌─json────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ {"a":{"b":[{"c":"42","d":"Hello","f":[[{"g":42.42}]],"k":{"j":"1000"}},{"c":"43"},{"d":"My","e":["1","2","3"],"f":[[{"g":43.43,"h":"2020-01-01"}]],"k":{"j":"2000"}}]}} │ +│ {"a":{"b":["1","2","3"]}} │ +│ {"a":{"b":[{"c":"44","f":[[{"h":"2020-01-02"}]]},{"d":"World","e":["4","5","6"],"f":[[{"g":44.44}]],"k":{"j":"3000"}}]}} │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +```sql +SELECT json.a.b, dynamicType(json.a.b) FROM test; +``` + +```text +┌─json.a.b──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─dynamicType(json.a.b)────────────────────────────────────┐ +│ ['{"c":"42","d":"Hello","f":[[{"g":42.42}]],"k":{"j":"1000"}}','{"c":"43"}','{"d":"My","e":["1","2","3"],"f":[[{"g":43.43,"h":"2020-01-01"}]],"k":{"j":"2000"}}'] │ Array(JSON(max_dynamic_types=16, max_dynamic_paths=256)) │ +│ [1,2,3] │ Array(Nullable(Int64)) │ +│ ['{"c":"44","f":[[{"h":"2020-01-02"}]]}','{"d":"World","e":["4","5","6"],"f":[[{"g":44.44}]],"k":{"j":"3000"}}'] │ Array(JSON(max_dynamic_types=16, max_dynamic_paths=256)) │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +As you can notice, the `max_dynamic_types/max_dynamic_paths` parameters of the nested `JSON` type were reduced compared to the default values. It's needed to avoid number of subcolumns to grow uncontrolled on nested arrays of JSON objects. + +Let's try to read subcolumns from this nested `JSON` column: + +```sql +SELECT json.a.b.:`Array(JSON)`.c, json.a.b.:`Array(JSON)`.f, json.a.b.:`Array(JSON)`.d FROM test; +``` + +```text +┌─json.a.b.:`Array(JSON)`.c─┬─json.a.b.:`Array(JSON)`.f───────────────────────────────────┬─json.a.b.:`Array(JSON)`.d─┐ +│ [42,43,NULL] │ [[['{"g":42.42}']],NULL,[['{"g":43.43,"h":"2020-01-01"}']]] │ ['Hello',NULL,'My'] │ +│ [] │ [] │ [] │ +│ [44,NULL] │ [[['{"h":"2020-01-02"}']],[['{"g":44.44}']]] │ [NULL,'World'] │ +└───────────────────────────┴─────────────────────────────────────────────────────────────┴───────────────────────────┘ +``` + +We can avoid writing `Array(JSON)` subcolumn name using special syntax: + +```sql +SELECT json.a.b[].c, json.a.b[].f, json.a.b[].d FROM test; +``` + +```text +┌─json.a.b.:`Array(JSON)`.c─┬─json.a.b.:`Array(JSON)`.f───────────────────────────────────┬─json.a.b.:`Array(JSON)`.d─┐ +│ [42,43,NULL] │ [[['{"g":42.42}']],NULL,[['{"g":43.43,"h":"2020-01-01"}']]] │ ['Hello',NULL,'My'] │ +│ [] │ [] │ [] │ +│ [44,NULL] │ [[['{"h":"2020-01-02"}']],[['{"g":44.44}']]] │ [NULL,'World'] │ +└───────────────────────────┴─────────────────────────────────────────────────────────────┴───────────────────────────┘ +``` + +The number of `[]` after path indicates the array level. `json.path[][]` will be transformed to `json.path.:Array(Array(JSON))` + +Let's check the paths and types inside our `Array(JSON)`: + +```sql +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b[]))) FROM test; +``` + +```text +┌─arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b.:`Array(JSON)`)))──┐ +│ ('c','Int64') │ +│ ('d','String') │ +│ ('f','Array(Array(JSON(max_dynamic_types=8, max_dynamic_paths=64)))') │ +│ ('k.j','Int64') │ +│ ('e','Array(Nullable(Int64))') │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Let's read subcolumns from `Array(JSON)` column: + +```sql +SELECT json.a.b[].c.:Int64, json.a.b[].f[][].g.:Float64, json.a.b[].f[][].h.:Date FROM test; +``` + +```text +┌─json.a.b.:`Array(JSON)`.c.:`Int64`─┬─json.a.b.:`Array(JSON)`.f.:`Array(Array(JSON))`.g.:`Float64`─┬─json.a.b.:`Array(JSON)`.f.:`Array(Array(JSON))`.h.:`Date`─┐ +│ [42,43,NULL] │ [[[42.42]],[],[[43.43]]] │ [[[NULL]],[],[['2020-01-01']]] │ +│ [] │ [] │ [] │ +│ [44,NULL] │ [[[NULL]],[[44.44]]] │ [[['2020-01-02']],[[NULL]]] │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘ +``` + +We can also read sub-object subcolumns from nested `JSON` column: + +```sql +SELECT json.a.b[].^k FROM test +``` + +```text +┌─json.a.b.:`Array(JSON)`.^`k`─────────┐ +│ ['{"j":"1000"}','{}','{"j":"2000"}'] │ +│ [] │ +│ ['{}','{"j":"3000"}'] │ +└──────────────────────────────────────┘ +``` + +## Reading JSON type from the data + +All text formats (JSONEachRow, TSV, CSV, CustomSeparated, Values, etc) supports reading `JSON` type. + +Examples: + +```sql +SELECT json FROM format(JSONEachRow, 'json JSON(a.b.c UInt32, SKIP a.b.d, SKIP d.e, SKIP REGEXP \'b.*\')', ' +{"json" : {"a" : {"b" : {"c" : 1, "d" : [0, 1]}}, "b" : "2020-01-01", "c" : 42, "d" : {"e" : {"f" : ["s1", "s2"]}, "i" : [1, 2, 3]}}} +{"json" : {"a" : {"b" : {"c" : 2, "d" : [2, 3]}}, "b" : [1, 2, 3], "c" : null, "d" : {"e" : {"g" : 43}, "i" : [4, 5, 6]}}} +{"json" : {"a" : {"b" : {"c" : 3, "d" : [4, 5]}}, "b" : {"c" : 10}, "e" : "Hello, World!"}} +{"json" : {"a" : {"b" : {"c" : 4, "d" : [6, 7]}}, "c" : 43}} +{"json" : {"a" : {"b" : {"c" : 5, "d" : [8, 9]}}, "b" : {"c" : 11, "j" : [1, 2, 3]}, "d" : {"e" : {"f" : ["s3", "s4"], "g" : 44}, "h" : "2020-02-02 10:00:00"}}} +') +``` + +```text +┌─json──────────────────────────────────────────────────────────┐ +│ {"a":{"b":{"c":1}},"c":"42","d":{"i":["1","2","3"]}} │ +│ {"a":{"b":{"c":2}},"d":{"i":["4","5","6"]}} │ +│ {"a":{"b":{"c":3}},"e":"Hello, World!"} │ +│ {"a":{"b":{"c":4}},"c":"43"} │ +│ {"a":{"b":{"c":5}},"d":{"h":"2020-02-02 10:00:00.000000000"}} │ +└───────────────────────────────────────────────────────────────┘ +``` + +For text formats like CSV/TSV/etc `JSON` is parsed from a string containing JSON object + +```sql +SELECT json FROM format(TSV, 'json JSON(a.b.c UInt32, SKIP a.b.d, SKIP REGEXP \'b.*\')', +'{"a" : {"b" : {"c" : 1, "d" : [0, 1]}}, "b" : "2020-01-01", "c" : 42, "d" : {"e" : {"f" : ["s1", "s2"]}, "i" : [1, 2, 3]}} +{"a" : {"b" : {"c" : 2, "d" : [2, 3]}}, "b" : [1, 2, 3], "c" : null, "d" : {"e" : {"g" : 43}, "i" : [4, 5, 6]}} +{"a" : {"b" : {"c" : 3, "d" : [4, 5]}}, "b" : {"c" : 10}, "e" : "Hello, World!"} +{"a" : {"b" : {"c" : 4, "d" : [6, 7]}}, "c" : 43} +{"a" : {"b" : {"c" : 5, "d" : [8, 9]}}, "b" : {"c" : 11, "j" : [1, 2, 3]}, "d" : {"e" : {"f" : ["s3", "s4"], "g" : 44}, "h" : "2020-02-02 10:00:00"}}') +``` + +```text +┌─json──────────────────────────────────────────────────────────┐ +│ {"a":{"b":{"c":1}},"c":"42","d":{"i":["1","2","3"]}} │ +│ {"a":{"b":{"c":2}},"d":{"i":["4","5","6"]}} │ +│ {"a":{"b":{"c":3}},"e":"Hello, World!"} │ +│ {"a":{"b":{"c":4}},"c":"43"} │ +│ {"a":{"b":{"c":5}},"d":{"h":"2020-02-02 10:00:00.000000000"}} │ +└───────────────────────────────────────────────────────────────┘ +``` + +## Reaching the limit of dynamic paths inside JSON + +`JSON` data type can store only limited number of paths as separate subcolumns inside. By default, this limit is 1024, but you can change it in type declaration using parameter `max_dynamic_paths`. +When the limit is reached, all new paths inserted to `JSON` column will be stored in a single shared data structure. It's still possible to read such paths as subcolumns, but it will require reading the whole +shared data structure to extract the values of this path. This limit is needed to avoid the enormous number of different subcolumns that can make the table unusable. + +Let's see what happens when the limit is reached in different scenarios. + +### Reaching the limit during data parsing + +During parsing of `JSON` object from the data, when the limit is reached for current block of data, all new paths will be stored in a shared data structure. We can check it using introspection functions `JSONDynamicPaths, JSONSharedDataPaths`: + +```sql +SELECT json, JSONDynamicPaths(json), JSONSharedDataPaths(json) FROM format(JSONEachRow, 'json JSON(max_dynamic_paths=3)', ' +{"json" : {"a" : {"b" : 42}, "c" : [1, 2, 3]}} +{"json" : {"a" : {"b" : 43}, "d" : "2020-01-01"}} +{"json" : {"a" : {"b" : 44}, "c" : [4, 5, 6]}} +{"json" : {"a" : {"b" : 43}, "d" : "2020-01-02", "e" : "Hello", "f" : {"g" : 42.42}}} +{"json" : {"a" : {"b" : 43}, "c" : [7, 8, 9], "f" : {"g" : 43.43}, "h" : "World"}} +') +``` + +```text +┌─json───────────────────────────────────────────────────────────┬─JSONDynamicPaths(json)─┬─JSONSharedDataPaths(json)─┐ +│ {"a":{"b":"42"},"c":["1","2","3"]} │ ['a.b','c','d'] │ [] │ +│ {"a":{"b":"43"},"d":"2020-01-01"} │ ['a.b','c','d'] │ [] │ +│ {"a":{"b":"44"},"c":["4","5","6"]} │ ['a.b','c','d'] │ [] │ +│ {"a":{"b":"43"},"d":"2020-01-02","e":"Hello","f":{"g":42.42}} │ ['a.b','c','d'] │ ['e','f.g'] │ +│ {"a":{"b":"43"},"c":["7","8","9"],"f":{"g":43.43},"h":"World"} │ ['a.b','c','d'] │ ['f.g','h'] │ +└────────────────────────────────────────────────────────────────┴────────────────────────┴───────────────────────────┘ +``` + +As we can see, after inserting paths `e` and `f.g` the limit was reached and we inserted them into shared data structure. + +### During merges of data parts in MergeTree table engines + +During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths won't be able to store all paths from source parts as subcolumns. +In this case ClickHouse chooses what paths will remain as subcolumns after merge and what types will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contains +the largest number of non-null values and move the rarest paths to the shared data structure, but it depends on the implementation. + +Let's see an example of such merge. First, let's create a table with `JSON` column, set the limit of dynamic paths to `3` and insert values with `5` different paths: + +```sql +CREATE TABLE test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree ORDER BY id; +SYSTEM STOP MERGES test; +INSERT INTO test SELECT number, formatRow('JSONEachRow', number as a) FROM numbers(5); +INSERT INTO test SELECT number, formatRow('JSONEachRow', number as b) FROM numbers(4); +INSERT INTO test SELECT number, formatRow('JSONEachRow', number as c) FROM numbers(3); +INSERT INTO test SELECT number, formatRow('JSONEachRow', number as d) FROM numbers(2); +INSERT INTO test SELECT number, formatRow('JSONEachRow', number as e) FROM numbers(1); +``` + +Each insert will create a separate data pert with `JSON` column containing single path: +```sql +SELECT count(), JSONDynamicPaths(json) AS dynamic_paths, JSONSharedDataPaths(json) AS shared_data_paths, _part FROM test GROUP BY _part, dynamic_paths, shared_data_paths ORDER BY _part ASC +``` + +```text +┌─count()─┬─dynamic_paths─┬─shared_data_paths─┬─_part─────┐ +│ 5 │ ['a'] │ [] │ all_1_1_0 │ +│ 4 │ ['b'] │ [] │ all_2_2_0 │ +│ 3 │ ['c'] │ [] │ all_3_3_0 │ +│ 2 │ ['d'] │ [] │ all_4_4_0 │ +│ 1 │ ['e'] │ [] │ all_5_5_0 │ +└─────────┴───────────────┴───────────────────┴───────────┘ + +``` + +Now, let's merge all parts into one and see what will happen: + +```sql +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamic_paths─┬─shared_data_paths─┬─_part─────┐ +│ 1 │ ['a','b','c'] │ ['e'] │ all_1_5_2 │ +│ 2 │ ['a','b','c'] │ ['d'] │ all_1_5_2 │ +│ 12 │ ['a','b','c'] │ [] │ all_1_5_2 │ +└─────────┴───────────────┴───────────────────┴───────────┘ +``` + +As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and moved paths `e` and `d` to shared data structure. + +## Introspection functions + +There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes). + +## Tips for better usage of the JSON type + +Before creating `JSON` column and loading data into it, consider the following tips: + +- Investigate your data and specify as many path hints with types as you can. It will make the storage and the reading much more efficient. +- Think about what paths you will need and what paths you will never need. Specify paths that you won't need in the SKIP section and SKIP REGEXP if needed. It will improve the storage. +- Don't set `max_dynamic_paths` parameter to very high values, it can make the storage and reading less efficient. diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index bedbcf0bd28..4ef1a7e6238 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -53,29 +53,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Intervals with different types can’t be combined. You can’t use intervals like `4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, the interval `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`. - -You can’t perform arithmetical operations with `Interval`-type values, but you can add intervals of different types consequently to values in `Date` or `DateTime` data types. For example: +Also it is possible to use multiple intervals simultaneously: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -The following query causes an exception: +And to compare values with different intervals: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## See Also diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index b6ac7a74092..ce2e3c43b3e 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -6,7 +6,7 @@ title: "Functions for Working with Geohash" ## Geohash -[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location. +[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be. If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/). @@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c Encodes latitude and longitude as a [geohash](#geohash)-string. +**Syntax** + ``` sql geohashEncode(longitude, latitude, [precision]) ``` **Input values** -- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` -- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` -- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. +- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md). +- `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md). + +:::note +- All coordinate parameters must be of the same type: either `Float32` or `Float64`. +- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`. +::: **Returned values** -- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). +- Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md). **Example** +Query: + ``` sql SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res; ``` +Result: + ``` text ┌─res──────────┐ │ ezs42d000000 │ @@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res; Decodes any [geohash](#geohash)-encoded string into longitude and latitude. +**Syntax** + +```sql +geohashDecode(hash_str) +``` + **Input values** -- encoded string - geohash-encoded string. +- `hash_str` — Geohash-encoded string. **Returned values** -- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. +- Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md)) **Example** diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index 25a7a1fac8e..be9e9810626 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -6,11 +6,13 @@ title: "Functions for Working with Polygons" ## WKT -Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: +Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: - POINT - POLYGON - MULTIPOLYGON +- LINESTRING +- MULTILINESTRING **Syntax** @@ -26,12 +28,16 @@ WKT(geo_data) - [Ring](../../data-types/geo.md#ring) - [Polygon](../../data-types/geo.md#polygon) - [MultiPolygon](../../data-types/geo.md#multipolygon) +- [LineString](../../data-types/geo.md#linestring) +- [MultiLineString](../../data-types/geo.md#multilinestring) **Returned value** - WKT geometric object `POINT` is returned for a Point. - WKT geometric object `POLYGON` is returned for a Polygon -- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. +- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. +- WKT geometric object `LINESTRING` is returned for a LineString. +- WKT geometric object `MULTILINESTRING` is returned for a MultiLineString. **Examples** @@ -84,7 +90,7 @@ SELECT ### Input parameters -String starting with `MULTIPOLYGON` +String starting with `MULTIPOLYGON` ### Returned value @@ -170,6 +176,34 @@ SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'); [(1,1),(2,2),(3,3),(1,1)] ``` +## readWKTMultiLineString + +Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format. + +### Syntax + +```sql +readWKTMultiLineString(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a MultiLineString geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the multilinestring geometry. + +### Example + +```sql +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))'); +``` + +```response +[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]] +``` + ## readWKTRing Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. @@ -219,7 +253,7 @@ UInt8, 0 for false, 1 for true ## polygonsDistanceSpherical -Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. +Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. ### Example diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 7bff6a6cba5..26fe888ab49 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1155,3 +1155,207 @@ SELECT jsonMergePatch('{"a":1}', '{"name": "joey"}', '{"name": "tom"}', '{"name" │ {"a":1,"name":"zoey"} │ └───────────────────────┘ ``` + +### JSONAllPaths + +Returns the list of all paths stored in each row in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONAllPaths(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Array(String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONAllPaths(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONAllPaths(json)─┐ +│ {"a":"42"} │ ['a'] │ +│ {"b":"Hello"} │ ['b'] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a','c'] │ +└──────────────────────────────────────┴────────────────────┘ +``` + +### JSONAllPathsWithTypes + +Returns the map of all paths and their data types stored in each row in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONAllPathsWithTypes(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Map(String, String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONAllPathsWithTypes(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONAllPathsWithTypes(json)───────────────┐ +│ {"a":"42"} │ {'a':'Int64'} │ +│ {"b":"Hello"} │ {'b':'String'} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))','c':'Date'} │ +└──────────────────────────────────────┴───────────────────────────────────────────┘ +``` + +### JSONDynamicPaths + +Returns the list of dynamic paths that are stored as separate subcolumns in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONDynamicPaths(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Array(String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONDynamicPaths(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONDynamicPaths(json)─┐ +| {"a":"42"} │ ['a'] │ +│ {"b":"Hello"} │ [] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a'] │ +└──────────────────────────────────────┴────────────────────────┘ +``` + +### JSONDynamicPathsWithTypes + +Returns the map of dynamic paths that are stored as separate subcolumns and their types in each row in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONAllPathsWithTypes(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Map(String, String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONDynamicPathsWithTypes(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONDynamicPathsWithTypes(json)─┐ +│ {"a":"42"} │ {'a':'Int64'} │ +│ {"b":"Hello"} │ {} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))'} │ +└──────────────────────────────────────┴─────────────────────────────────┘ +``` + +### JSONSharedDataPaths + +Returns the list of paths that are stored in shared data structure in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONSharedDataPaths(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Array(String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONSharedDataPaths(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONSharedDataPaths(json)─┐ +│ {"a":"42"} │ [] │ +│ {"b":"Hello"} │ ['b'] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['c'] │ +└──────────────────────────────────────┴───────────────────────────┘ +``` + +### JSONSharedDataPathsWithTypes + +Returns the map of paths that are stored in shared data structure and their types in each row in [JSON](../data-types/newjson.md) column. + +**Syntax** + +``` sql +JSONSharedDataPathsWithTypes(json) +``` + +**Arguments** + +- `json` — [JSON](../data-types/newjson.md). + +**Returned value** + +- An array of paths. [Map(String, String)](../data-types/array.md). + +**Example** + +``` sql +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONSharedDataPathsWithTypes(json) FROM test; +``` + +```text +┌─json─────────────────────────────────┬─JSONSharedDataPathsWithTypes(json)─┐ +│ {"a":"42"} │ {} │ +│ {"b":"Hello"} │ {'b':'String'} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'c':'Date'} │ +└──────────────────────────────────────┴────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index afd779ca8cd..1b50104da52 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -4189,3 +4189,94 @@ Result: │ 32 │ └─────────────────────────────┘ ``` + +## getSubcolumn + +Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression. + +**Syntax** + +```sql +getSubcolumn(col_name, subcol_name) +``` + +**Arguments** + +- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers). +- `subcol_name` — The name of the sub-column. [String](../data-types/string.md). + +**Returned value** + +- Returns the extracted sub-column. + +**Example** + +Query: + +```sql +CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]); +SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr; +``` + +Result: + +```response + ┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐ +1. │ [1,2] │ ['Hello','World'] │ +2. │ [3,4,5] │ ['This','is','subcolumn'] │ + └─────────────────────────────────┴─────────────────────────────────┘ +``` + +## getTypeSerializationStreams + +Enumerates stream paths of a data type. + +:::note +This function is intended for use by developers. +::: + +**Syntax** + +```sql +getTypeSerializationStreams(col) +``` + +**Arguments** + +- `col` — Column or string representation of a data-type from which the data type will be detected. + +**Returned value** + +- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)). + +**Examples** + +Query: + +```sql +SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2)); +``` + +Result: + +```response + ┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐ +1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │ + └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +```sql +SELECT getTypeSerializationStreams('Map(String, Int64)'); +``` + +Result: + +```response + ┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐ +1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │ + └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 177790c983e..0cc6b0b27d5 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -8,6 +8,78 @@ sidebar_label: Replacing in Strings [General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately. +## overlay + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +**Syntax** + +```sql +overlay(s, replace, offset[, length]) +``` + +**Parameters** + +- `input`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res; +``` + +Result: + +```text +┌─res─────────────┐ +│ ClickHouse CORE │ +└─────────────────┘ +``` + +## overlayUTF8 + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +overlayUTF8(s, replace, offset[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +``` + +Result: + +```text +┌─res────────────────────────┐ +│ ClickHouse是开源OLAP数据库 │ +└────────────────────────────┘ +``` + ## replaceOne Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string. diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index a2b6e496319..edd04580f27 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -64,9 +64,8 @@ toInt8(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -95,7 +94,7 @@ SELECT toInt8(-8), toInt8(-8.8), toInt8('-8') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -156,7 +155,7 @@ Query: SELECT toInt8OrZero('-8'), toInt8OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -216,7 +215,7 @@ Query: SELECT toInt8OrNull('-8'), toInt8OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -251,9 +250,8 @@ toInt8OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int8` is unsuccessful. [Int8](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -281,7 +279,7 @@ Query: SELECT toInt8OrDefault('-8', CAST('-1', 'Int8')), toInt8OrDefault('abc', CAST('-1', 'Int8')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -314,9 +312,8 @@ toInt16(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -345,7 +342,7 @@ SELECT toInt16(-16), toInt16(-16.16), toInt16('-16') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -406,7 +403,7 @@ Query: SELECT toInt16OrZero('-16'), toInt16OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -466,7 +463,7 @@ Query: SELECT toInt16OrNull('-16'), toInt16OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -501,9 +498,8 @@ toInt16OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int16` is unsuccessful. [Int16](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -531,7 +527,7 @@ Query: SELECT toInt16OrDefault('-16', CAST('-1', 'Int16')), toInt16OrDefault('abc', CAST('-1', 'Int16')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -564,9 +560,8 @@ toInt32(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -595,7 +590,7 @@ SELECT toInt32(-32), toInt32(-32.32), toInt32('-32') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -656,7 +651,7 @@ Query: SELECT toInt32OrZero('-32'), toInt32OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -715,7 +710,7 @@ Query: SELECT toInt32OrNull('-32'), toInt32OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -750,9 +745,8 @@ toInt32OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int32` is unsuccessful. [Int32](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -780,7 +774,7 @@ Query: SELECT toInt32OrDefault('-32', CAST('-1', 'Int32')), toInt32OrDefault('abc', CAST('-1', 'Int32')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -813,9 +807,8 @@ toInt64(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported types: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -844,7 +837,7 @@ SELECT toInt64(-64), toInt64(-64.64), toInt64('-64') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -905,7 +898,7 @@ Query: SELECT toInt64OrZero('-64'), toInt64OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -965,7 +958,7 @@ Query: SELECT toInt64OrNull('-64'), toInt64OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1000,9 +993,8 @@ toInt64OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int64` is unsuccessful. [Int64](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1030,7 +1022,7 @@ Query: SELECT toInt64OrDefault('-64', CAST('-1', 'Int64')), toInt64OrDefault('abc', CAST('-1', 'Int64')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1063,9 +1055,8 @@ toInt128(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1093,7 +1084,7 @@ SELECT toInt128(-128), toInt128(-128.8), toInt128('-128') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1154,7 +1145,7 @@ Query: SELECT toInt128OrZero('-128'), toInt128OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1214,7 +1205,7 @@ Query: SELECT toInt128OrNull('-128'), toInt128OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1279,7 +1270,7 @@ Query: SELECT toInt128OrDefault('-128', CAST('-1', 'Int128')), toInt128OrDefault('abc', CAST('-1', 'Int128')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1312,9 +1303,8 @@ toInt256(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1342,7 +1332,7 @@ SELECT toInt256(-256), toInt256(-256.256), toInt256('-256') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1403,7 +1393,7 @@ Query: SELECT toInt256OrZero('-256'), toInt256OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1463,7 +1453,7 @@ Query: SELECT toInt256OrNull('-256'), toInt256OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1498,9 +1488,8 @@ toInt256OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int256` is unsuccessful. [Int256](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf` @@ -1528,7 +1517,7 @@ Query: SELECT toInt256OrDefault('-256', CAST('-1', 'Int256')), toInt256OrDefault('abc', CAST('-1', 'Int256')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1546,7 +1535,7 @@ toInt256OrDefault('abc', CAST('-1', 'Int256')): -1 - [`toInt256OrZero`](#toint256orzero). - [`toInt256OrNull`](#toint256ornull). -# toUInt8 +## toUInt8 Converts an input value to a value of type [`UInt8`](../data-types/int-uint.md). Throws an exception in case of an error. @@ -1561,9 +1550,8 @@ toUInt8(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1592,7 +1580,7 @@ SELECT toUInt8(8), toUInt8(8.8), toUInt8('8') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1653,7 +1641,7 @@ Query: SELECT toUInt8OrZero('-8'), toUInt8OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1713,7 +1701,7 @@ Query: SELECT toUInt8OrNull('8'), toUInt8OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1748,9 +1736,8 @@ toUInt8OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt8` is unsuccessful. [UInt8](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1778,7 +1765,7 @@ Query: SELECT toUInt8OrDefault('8', CAST('0', 'UInt8')), toUInt8OrDefault('abc', CAST('0', 'UInt8')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1811,9 +1798,8 @@ toUInt16(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1842,7 +1828,7 @@ SELECT toUInt16(16), toUInt16(16.16), toUInt16('16') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1903,7 +1889,7 @@ Query: SELECT toUInt16OrZero('16'), toUInt16OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1963,7 +1949,7 @@ Query: SELECT toUInt16OrNull('16'), toUInt16OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1998,9 +1984,8 @@ toUInt16OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt16` is unsuccessful. [UInt16](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2028,7 +2013,7 @@ Query: SELECT toUInt16OrDefault('16', CAST('0', 'UInt16')), toUInt16OrDefault('abc', CAST('0', 'UInt16')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2061,9 +2046,8 @@ toUInt32(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2092,7 +2076,7 @@ SELECT toUInt32(32), toUInt32(32.32), toUInt32('32') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2154,7 +2138,7 @@ Query: SELECT toUInt32OrZero('32'), toUInt32OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2214,7 +2198,7 @@ Query: SELECT toUInt32OrNull('32'), toUInt32OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2249,9 +2233,8 @@ toUInt32OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt32` is unsuccessful. [UInt32](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2279,7 +2262,7 @@ Query: SELECT toUInt32OrDefault('32', CAST('0', 'UInt32')), toUInt32OrDefault('abc', CAST('0', 'UInt32')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2312,9 +2295,8 @@ toUInt64(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported types: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2343,7 +2325,7 @@ SELECT toUInt64(64), toUInt64(64.64), toUInt64('64') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2362,7 +2344,7 @@ toUInt64('64'): 64 - [`toUInt64OrNull`](#touint64ornull). - [`toUInt64OrDefault`](#touint64ordefault). -## toInt64OrZero +## toUInt64OrZero Like [`toUInt64`](#touint64), this function converts an input value to a value of type [UInt64](../data-types/int-uint.md) but returns `0` in case of an error. @@ -2404,7 +2386,7 @@ Query: SELECT toUInt64OrZero('64'), toUInt64OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2464,7 +2446,7 @@ Query: SELECT toUInt64OrNull('64'), toUInt64OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2499,9 +2481,8 @@ toUInt64OrDefault(expr[, default]) - `defauult` (optional) — The default value to return if parsing to type `UInt64` is unsuccessful. [UInt64](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2529,7 +2510,7 @@ Query: SELECT toUInt64OrDefault('64', CAST('0', 'UInt64')), toUInt64OrDefault('abc', CAST('0', 'UInt64')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2562,9 +2543,8 @@ toUInt128(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2592,7 +2572,7 @@ SELECT toUInt128(128), toUInt128(128.8), toUInt128('128') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2653,7 +2633,7 @@ Query: SELECT toUInt128OrZero('128'), toUInt128OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2713,7 +2693,7 @@ Query: SELECT toUInt128OrNull('128'), toUInt128OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2778,7 +2758,7 @@ Query: SELECT toUInt128OrDefault('128', CAST('0', 'UInt128')), toUInt128OrDefault('abc', CAST('0', 'UInt128')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2811,9 +2791,8 @@ toUInt256(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2841,7 +2820,7 @@ SELECT toUInt256(256), toUInt256(256.256), toUInt256('256') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2902,7 +2881,7 @@ Query: SELECT toUInt256OrZero('256'), toUInt256OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2962,7 +2941,7 @@ Query: SELECT toUInt256OrNull('256'), toUInt256OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2997,9 +2976,8 @@ toUInt256OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt256` is unsuccessful. [UInt256](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf` @@ -3027,7 +3005,7 @@ Query: SELECT toUInt256OrDefault('-256', CAST('0', 'UInt256')), toUInt256OrDefault('abc', CAST('0', 'UInt256')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -3954,175 +3932,1022 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDateTime64OrDefault -## toDecimal(32\|64\|128\|256) +## toDecimal32 -Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts an input value to a value of type [`Decimal(9, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. -- `toDecimal32(value, S)` -- `toDecimal64(value, S)` -- `toDecimal128(value, S)` -- `toDecimal256(value, S)` +**Syntax** -## toDecimal(32\|64\|128\|256)OrNull - -Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes: - -- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. -- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. -- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. -- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. +```sql +toDecimal32(expr, S) +``` **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: **Returned value** -A value in the `Nullable(Decimal(P,S))` data type. The value contains: +- Value of type `Decimal(9, S)`. [Decimal32(S)](../data-types/int-uint.md). -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. - -**Examples** +**Example** Query: -``` sql -SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val); +```sql +SELECT + toDecimal32(2, 1) AS a, toTypeName(a) AS type_a, + toDecimal32(4.2, 2) AS b, toTypeName(b) AS type_b, + toDecimal32('4.2', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; ``` Result: ```response -┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ -│ -1.111 │ Nullable(Decimal(9, 5)) │ -└────────┴────────────────────────────────────────────────────┘ +Row 1: +────── +a: 2 +type_a: Decimal(9, 1) +b: 4.2 +type_b: Decimal(9, 2) +c: 4.2 +type_c: Decimal(9, 3) ``` -Query: +**See also** -``` sql -SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val); +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrNull`](#todecimal32ornull). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrZero + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Decimal(9, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal32OrZero(expr, S) ``` -Result: - -```response -┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ -│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ -└──────┴────────────────────────────────────────────────────┘ -``` - - -## toDecimal(32\|64\|128\|256)OrDefault - -Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes: - -- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a default value instead of an exception in the event of an input value parsing error. - **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: **Returned value** -A value in the `Decimal(P,S)` data type. The value contains: - -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. - -**Examples** - -Query: - -``` sql -SELECT toDecimal32OrDefault(toString(-1.111), 5) AS val, toTypeName(val); -``` - -Result: - -```response -┌────val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 5))─┐ -│ -1.111 │ Decimal(9, 5) │ -└────────┴───────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT toDecimal32OrDefault(toString(-1.111), 2) AS val, toTypeName(val); -``` - -Result: - -```response -┌─val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 2))─┐ -│ 0 │ Decimal(9, 2) │ -└─────┴───────────────────────────────────────────────────────┘ -``` - -## toDecimal(32\|64\|128\|256)OrZero - -Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes: - -- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. - -**Arguments** - -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. - -**Returned value** - -A value in the `Nullable(Decimal(P,S))` data type. The value contains: - -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Value of type `Decimal(9, S)` if successful, otherwise `0` with `S` decimal places. [Decimal32(S)](../data-types/decimal.md). **Example** Query: ``` sql -SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val); +SELECT + toDecimal32OrZero(toString(-1.111), 5) AS a, + toTypeName(a), + toDecimal32OrZero(toString('Inf'), 5) as b, + toTypeName(b) +FORMAT Vertical; ``` Result: ```response -┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ -│ -1.111 │ Decimal(9, 5) │ -└────────┴────────────────────────────────────────────────────┘ +Row 1: +────── +a: -1.111 +toTypeName(a): Decimal(9, 5) +b: 0 +toTypeName(b): Decimal(9, 5) ``` +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrNull`](#todecimal32ornull). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrNull + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Nullable(Decimal(9, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal32OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(9, S))` if successful, otherwise value `NULL` of the same type. [Decimal32(S)](../data-types/decimal.md). + +**Examples** + Query: ``` sql -SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val); +SELECT + toDecimal32OrNull(toString(-1.111), 5) AS a, + toTypeName(a), + toDecimal32OrNull(toString('Inf'), 5) as b, + toTypeName(b) +FORMAT Vertical; ``` Result: ```response -┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ -│ 0.00 │ Decimal(9, 2) │ -└──────┴────────────────────────────────────────────────────┘ +Row 1: +────── +a: -1.111 +toTypeName(a): Nullable(Decimal(9, 5)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(9, 5)) ``` +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrDefault + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Decimal(9, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal32OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal32(S)` is unsuccessful. [Decimal32(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(9, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal32(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal32OrDefault(toString(0.0001), 5) AS a, + toTypeName(a), + toDecimal32OrDefault('Inf', 0, CAST('-1', 'Decimal32(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(9, 5) +b: -1 +toTypeName(b): Decimal(9, 0) +``` + +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrNull`](#todecimal32ornull). + +## toDecimal64 + +Converts an input value to a value of type [`Decimal(18, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal64(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(18, S)`. [Decimal64(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal64(2, 1) AS a, toTypeName(a) AS type_a, + toDecimal64(4.2, 2) AS b, toTypeName(b) AS type_b, + toDecimal64('4.2', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 2 +type_a: Decimal(18, 1) +b: 4.2 +type_b: Decimal(18, 2) +c: 4.2 +type_c: Decimal(18, 3) +``` + +**See also** + +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrNull`](#todecimal64ornull). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrZero + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Decimal(18, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal64OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(18, S)` if successful, otherwise `0` with `S` decimal places. [Decimal64(S)](../data-types/decimal.md). + +**Example** + +Query: + +``` sql +SELECT + toDecimal64OrZero(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrZero(toString('Inf'), 18) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(18, 18) +b: 0 +toTypeName(b): Decimal(18, 18) +``` + +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrNull`](#todecimal64ornull). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrNull + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Nullable(Decimal(18, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal64OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(18, S))` if successful, otherwise value `NULL` of the same type. [Decimal64(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal64OrNull(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrNull(toString('Inf'), 18) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Nullable(Decimal(18, 18)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(18, 18)) +``` + +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrDefault + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Decimal(18, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal64OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal64(S)` is unsuccessful. [Decimal64(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(18, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal64(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal64OrDefault(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrDefault('Inf', 0, CAST('-1', 'Decimal64(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(18, 18) +b: -1 +toTypeName(b): Decimal(18, 0) +``` + +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrNull`](#todecimal64ornull). + +## toDecimal128 + +Converts an input value to a value of type [`Decimal(38, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal128(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(38, S)`. [Decimal128(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal128(99, 1) AS a, toTypeName(a) AS type_a, + toDecimal128(99.67, 2) AS b, toTypeName(b) AS type_b, + toDecimal128('99.67', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 99 +type_a: Decimal(38, 1) +b: 99.67 +type_b: Decimal(38, 2) +c: 99.67 +type_c: Decimal(38, 3) +``` + +**See also** + +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrNull`](#todecimal128ornull). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrZero + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Decimal(38, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal128OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(38, S)` if successful, otherwise `0` with `S` decimal places. [Decimal128(S)](../data-types/decimal.md). + +**Example** + +Query: + +``` sql +SELECT + toDecimal128OrZero(toString(0.0001), 38) AS a, + toTypeName(a), + toDecimal128OrZero(toString('Inf'), 38) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(38, 38) +b: 0 +toTypeName(b): Decimal(38, 38) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrNull`](#todecimal128ornull). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrNull + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Nullable(Decimal(38, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal128OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(38, S))` if successful, otherwise value `NULL` of the same type. [Decimal128(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal128OrNull(toString(1/42), 38) AS a, + toTypeName(a), + toDecimal128OrNull(toString('Inf'), 38) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Nullable(Decimal(38, 38)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(38, 38)) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrDefault + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Decimal(38, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal128OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal128(S)` is unsuccessful. [Decimal128(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(38, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal128(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal128OrDefault(toString(1/42), 18) AS a, + toTypeName(a), + toDecimal128OrDefault('Inf', 0, CAST('-1', 'Decimal128(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Decimal(38, 18) +b: -1 +toTypeName(b): Decimal(38, 0) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrNull`](#todecimal128ornull). + +## toDecimal256 + +Converts an input value to a value of type [`Decimal(76, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal256(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(76, S)`. [Decimal256(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal256(99, 1) AS a, toTypeName(a) AS type_a, + toDecimal256(99.67, 2) AS b, toTypeName(b) AS type_b, + toDecimal256('99.67', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 99 +type_a: Decimal(76, 1) +b: 99.67 +type_b: Decimal(76, 2) +c: 99.67 +type_c: Decimal(76, 3) +``` + +**See also** + +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrNull`](#todecimal256ornull). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrZero + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Decimal(76, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal256OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(76, S)` if successful, otherwise `0` with `S` decimal places. [Decimal256(S)](../data-types/decimal.md). + +**Example** + +Query: + +``` sql +SELECT + toDecimal256OrZero(toString(0.0001), 76) AS a, + toTypeName(a), + toDecimal256OrZero(toString('Inf'), 76) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(76, 76) +b: 0 +toTypeName(b): Decimal(76, 76) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrNull`](#todecimal256ornull). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrNull + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Nullable(Decimal(76, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal256OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(76, S))` if successful, otherwise value `NULL` of the same type. [Decimal256(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal256OrNull(toString(1/42), 76) AS a, + toTypeName(a), + toDecimal256OrNull(toString('Inf'), 76) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Nullable(Decimal(76, 76)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(76, 76)) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrDefault + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Decimal(76, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal256OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal256(S)` is unsuccessful. [Decimal256(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(76, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal256(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal256OrDefault(toString(1/42), 76) AS a, + toTypeName(a), + toDecimal256OrDefault('Inf', 0, CAST('-1', 'Decimal256(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Decimal(76, 76) +b: -1 +toTypeName(b): Decimal(76, 0) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrNull`](#todecimal256ornull). + ## toString Functions for converting between numbers, strings (but not fixed strings), dates, and dates with times. @@ -5278,30 +6103,23 @@ Result: └───────┴───────────────┴──────┴──────────────┴──────────────┴──────────────────────┘ ``` -## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) +## toIntervalYear -Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. +Returns an interval of `n` years of data type [IntervalYear](../data-types/special-data-types/interval.md). **Syntax** ``` sql -toIntervalSecond(number) -toIntervalMinute(number) -toIntervalHour(number) -toIntervalDay(number) -toIntervalWeek(number) -toIntervalMonth(number) -toIntervalQuarter(number) -toIntervalYear(number) +toIntervalYear(n) ``` **Arguments** -- `number` — Duration of interval. Positive integer number. +- `n` — Number of years. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). **Returned values** -- The value in `Interval` data type. +- Interval of `n` years. [IntervalYear](../data-types/special-data-types/interval.md). **Example** @@ -5309,20 +6127,387 @@ Query: ``` sql WITH - toDate('2019-01-01') AS date, - INTERVAL 1 WEEK AS interval_week, - toIntervalWeek(1) AS interval_to_week -SELECT - date + interval_week, - date + interval_to_week; + toDate('2024-06-15') AS date, + toIntervalYear(1) AS interval_to_year +SELECT date + interval_to_year AS result ``` Result: ```response -┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ -│ 2019-01-08 │ 2019-01-08 │ -└───────────────────────────┴──────────────────────────────┘ +┌─────result─┐ +│ 2025-06-15 │ +└────────────┘ +``` + +## toIntervalQuarter + +Returns an interval of `n` quarters of data type [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalQuarter(n) +``` + +**Arguments** + +- `n` — Number of quarters. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` quarters. [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalQuarter(1) AS interval_to_quarter +SELECT date + interval_to_quarter AS result +``` + +Result: + +```response +┌─────result─┐ +│ 2024-09-15 │ +└────────────┘ +``` + +## toIntervalMonth + +Returns an interval of `n` months of data type [IntervalMonth](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMonth(n) +``` + +**Arguments** + +- `n` — Number of months. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` months. [IntervalMonth](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalMonth(1) AS interval_to_month +SELECT date + interval_to_month AS result +``` + +Result: + +```response +┌─────result─┐ +│ 2024-07-15 │ +└────────────┘ +``` + +## toIntervalWeek + +Returns an interval of `n` weeks of data type [IntervalWeek](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalWeek(n) +``` + +**Arguments** + +- `n` — Number of weeks. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` weeks. [IntervalWeek](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalWeek(1) AS interval_to_week +SELECT date + interval_to_week AS result +``` + +Result: + +```response +┌─────result─┐ +│ 2024-06-22 │ +└────────────┘ +``` + +## toIntervalDay + +Returns an interval of `n` days of data type [IntervalDay](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalDay(n) +``` + +**Arguments** + +- `n` — Number of days. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` days. [IntervalDay](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalDay(5) AS interval_to_days +SELECT date + interval_to_days AS result +``` + +Result: + +```response +┌─────result─┐ +│ 2024-06-20 │ +└────────────┘ +``` + +## toIntervalHour + +Returns an interval of `n` hours of data type [IntervalHour](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalHour(n) +``` + +**Arguments** + +- `n` — Number of hours. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` hours. [IntervalHour](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalHour(12) AS interval_to_hours +SELECT date + interval_to_hours AS result +``` + +Result: + +```response +┌──────────────result─┐ +│ 2024-06-15 12:00:00 │ +└─────────────────────┘ +``` + +## toIntervalMinute + +Returns an interval of `n` minutes of data type [IntervalMinute](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMinute(n) +``` + +**Arguments** + +- `n` — Number of minutes. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` minutes. [IntervalMinute](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalMinute(12) AS interval_to_minutes +SELECT date + interval_to_minutes AS result +``` + +Result: + +```response +┌──────────────result─┐ +│ 2024-06-15 00:12:00 │ +└─────────────────────┘ +``` + +## toIntervalSecond + +Returns an interval of `n` seconds of data type [IntervalSecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalSecond(n) +``` + +**Arguments** + +- `n` — Number of seconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` seconds. [IntervalSecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2024-06-15') AS date, + toIntervalSecond(30) AS interval_to_seconds +SELECT date + interval_to_seconds AS result +``` + +Result: + +```response +┌──────────────result─┐ +│ 2024-06-15 00:00:30 │ +└─────────────────────┘ +``` + +## toIntervalMillisecond + +Returns an interval of `n` milliseconds of data type [IntervalMillisecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMillisecond(n) +``` + +**Arguments** + +- `n` — Number of milliseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` milliseconds. [IntervalMilliseconds](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2024-06-15') AS date, + toIntervalMillisecond(30) AS interval_to_milliseconds +SELECT date + interval_to_milliseconds AS result +``` + +Result: + +```response +┌──────────────────result─┐ +│ 2024-06-15 00:00:00.030 │ +└─────────────────────────┘ +``` + +## toIntervalMicrosecond + +Returns an interval of `n` microseconds of data type [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMicrosecond(n) +``` + +**Arguments** + +- `n` — Number of microseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` microseconds. [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2024-06-15') AS date, + toIntervalMicrosecond(30) AS interval_to_microseconds +SELECT date + interval_to_microseconds AS result +``` + +Result: + +```response +┌─────────────────────result─┐ +│ 2024-06-15 00:00:00.000030 │ +└────────────────────────────┘ +``` + +## toIntervalNanosecond + +Returns an interval of `n` nanoseconds of data type [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalNanosecond(n) +``` + +**Arguments** + +- `n` — Number of nanoseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md). + +**Returned values** + +- Interval of `n` nanoseconds. [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2024-06-15') AS date, + toIntervalNanosecond(30) AS interval_to_nanoseconds +SELECT date + interval_to_nanoseconds AS result +``` + +Result: + +```response +┌────────────────────────result─┐ +│ 2024-06-15 00:00:00.000000030 │ +└───────────────────────────────┘ ``` ## parseDateTime diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 778816f8934..1bb7817364a 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -9,6 +9,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget - [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. - [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. +- [DROP DETACHED PARTITION\|PART](#drop-detached-partitionpart) - Delete a part or all parts of a partition from `detached`. - [FORGET PARTITION](#forget-partition) — Deletes a partition metadata from zookeeper if it's empty. - [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. - [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. @@ -68,7 +69,7 @@ ALTER TABLE mt DROP PART 'all_4_4_0'; ## DROP DETACHED PARTITION\|PART ``` sql -ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partition_expr +ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART ALL|partition_expr ``` Removes the specified part or all parts of the specified partition from `detached`. diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 6880cef0e5c..7a1774a01b5 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -8,26 +8,28 @@ sidebar_label: STATISTICS The following operations are available: -- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. +- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata. -- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. +- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata. -- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. +- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. -- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. +- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. -- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The first two commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing statistics metadata via ZooKeeper. -There is an example adding two statistics types to two columns: +## Example: + +Adding two statistics types to two columns: ``` ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` :::note -Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 2931f7020fb..45e7a41e8a2 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -13,8 +13,8 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia Syntax: ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] -[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... [COMMENT 'comment'] ``` @@ -55,8 +55,8 @@ SELECT * FROM view(column1=value1, column2=value2 ...) ## Materialized View ``` sql -CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] -[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... [COMMENT 'comment'] ``` @@ -92,7 +92,7 @@ Given that `POPULATE` works like `CREATE TABLE ... AS SELECT ...` it has limitat - It is not supported with Replicated database - It is not supported in ClickHouse cloud -Instead a separate `INSERT ... SELECT` can be used. +Instead a separate `INSERT ... SELECT` can be used. ::: A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`. @@ -110,7 +110,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop `DEFINER` and `SQL SECURITY` allow you to specify which ClickHouse user to use when executing the view's underlying query. `SQL SECURITY` has three legal values: `DEFINER`, `INVOKER`, or `NONE`. You can specify any existing user or `CURRENT_USER` in the `DEFINER` clause. -The following table will explain which rights are required for which user in order to select from view. +The following table will explain which rights are required for which user in order to select from view. Note that regardless of the SQL security option, in every case it is still required to have `GRANT SELECT ON ` in order to read from it. | SQL security option | View | Materialized View | @@ -130,7 +130,7 @@ If `DEFINER`/`SQL SECURITY` aren't specified, the default values are used: If a view is attached without `DEFINER`/`SQL SECURITY` specified, the default value is `SQL SECURITY NONE` for the materialized view and `SQL SECURITY INVOKER` for the normal view. -To change SQL security for an existing view, use +To change SQL security for an existing view, use ```sql ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }] ``` @@ -161,6 +161,8 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name REFRESH EVERY|AFTER interval [OFFSET interval] RANDOMIZE FOR interval DEPENDS ON [db.]name [, [db.]name [, ...]] +SETTINGS name = value [, name = value [, ...]] +[APPEND] [TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY] AS SELECT ... [COMMENT 'comment'] @@ -170,18 +172,23 @@ where `interval` is a sequence of simple intervals: number SECOND|MINUTE|HOUR|DAY|WEEK|MONTH|YEAR ``` -Periodically runs the corresponding query and stores its result in a table, atomically replacing the table's previous contents. +Periodically runs the corresponding query and stores its result in a table. + * If the query says `APPEND`, each refresh inserts rows into the table without deleting existing rows. The insert is not atomic, just like a regular INSERT SELECT. + * Otherwise each refresh atomically replaces the table's previous contents. Differences from regular non-refreshable materialized views: - * No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query and replaces the entire table. + * No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query. * No restrictions on the SELECT query. Table functions (e.g. `url()`), views, UNION, JOIN, are all allowed. +:::note +The settings in the `REFRESH ... SETTINGS` part of the query are refresh settings (e.g. `refresh_retries`), distinct from regular settings (e.g. `max_threads`). Regular settings can be specified using `SETTINGS` at the end of the query. +::: + :::note Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations: * not compatible with Replicated database or table engines * It is not supported in ClickHouse Cloud * require [Atomic database engine](../../../engines/database-engines/atomic.md), - * no retries for failed refresh - we just skip to the next scheduled refresh time, * no limit on number of concurrent refreshes. ::: @@ -246,15 +253,22 @@ A few more examples: `DEPENDS ON` only works between refreshable materialized views. Listing a regular table in the `DEPENDS ON` list will prevent the view from ever refreshing (dependencies can be removed with `ALTER`, see below). ::: +### Settings + +Available refresh settings: + * `refresh_retries` - How many times to retry if refresh query fails with an exception. If all retries fail, skip to the next scheduled refresh time. 0 means no retries, -1 means infinite retries. Default: 0. + * `refresh_retry_initial_backoff_ms` - Delay before the first retry, if `refresh_retries` is not zero. Each subsequent retry doubles the delay, up to `refresh_retry_max_backoff_ms`. Default: 100 ms. + * `refresh_retry_max_backoff_ms` - Limit on the exponential growth of delay between refresh attempts. Default: 60000 ms (1 minute). + ### Changing Refresh Parameters {#changing-refresh-parameters} To change refresh parameters: ``` -ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...] +ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...] [SETTINGS ...] ``` :::note -This replaces refresh schedule *and* dependencies. If the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies. +This replaces *all* refresh parameters at once: schedule, dependencies, settings, and APPEND-ness. E.g. if the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies. ::: ### Other operations @@ -263,6 +277,10 @@ The status of all refreshable materialized views is available in table [`system. To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views). +:::note +Fun fact: the refresh query is allowed to read from the view that's being refreshed, seeing pre-refresh version of the data. This means you can implement Conway's game of life: https://pastila.nl/?00021a4b/d6156ff819c83d490ad2dcec05676865#O0LGWTO7maUQIA4AcGUtlA== +::: + ## Window View [Experimental] :::info diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md index 88a9c933519..78142f880fe 100644 --- a/docs/en/sql-reference/statements/delete.md +++ b/docs/en/sql-reference/statements/delete.md @@ -38,8 +38,7 @@ If you anticipate frequent deletes, consider using a [custom partitioning key](/ ### Lightweight `DELETE`s with projections -By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance. -However, there is an option to change this behavior. By changing setting `lightweight_mutation_projection_mode = 'drop'`, deletes will work with projections. +By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` can change the behavior. ## Performance considerations when using lightweight `DELETE` diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 43fa344a16d..6118f4c1d36 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -200,6 +200,7 @@ Hierarchy of privileges: - `JDBC` - `HDFS` - `S3` + - `POSTGRES` - [dictGet](#dictget) - [displaySecretsInShowAndSelect](#displaysecretsinshowandselect) - [NAMED COLLECTION ADMIN](#named-collection-admin) @@ -476,6 +477,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab - `JDBC`. Level: `GLOBAL` - `HDFS`. Level: `GLOBAL` - `S3`. Level: `GLOBAL` + - `POSTGRES`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 96d9d26977d..b228f7025c4 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -186,7 +186,7 @@ Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`. ::: -Clickhouse currently supports `ALL INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. +Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. **Example** diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 35f2f15dd80..77d023b67ce 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -136,7 +136,13 @@ The compiled expression cache is enabled/disabled with the query/user/profile-le ## DROP QUERY CACHE +```sql +SYSTEM DROP QUERY CACHE; +SYSTEM DROP QUERY CACHE TAG '' +```` + Clears the [query cache](../../operations/query-cache.md). +If a tag is specified, only query cache entries with the specified tag are deleted. ## DROP FORMAT SCHEMA CACHE {#system-drop-schema-format} @@ -400,7 +406,7 @@ SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_ After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported: - If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue. - - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. + - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas. - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed. @@ -526,6 +532,10 @@ Trigger an immediate out-of-schedule refresh of a given view. SYSTEM REFRESH VIEW [db.]name ``` +### REFRESH VIEW + +Wait for the currently running refresh to complete. If the refresh fails, throws an exception. If no refresh is running, completes immediately, throwing an exception if previous refresh failed. + ### STOP VIEW, STOP VIEWS Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index f59fedeb3a2..6936c807f96 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam **See Also** - [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) + +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 44b1b50620a..3243e6cf569 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -103,7 +103,7 @@ LIMIT 2; └─────────┴─────────┴─────────┘ ``` -### Inserting data from a file into a table: +### Inserting data from a file into a table ``` sql INSERT INTO FUNCTION @@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Settings {#settings} - [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..e15f8a40156 --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,36 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, max_query_length[, random_seed]]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 405ac477846..30d2e371c7e 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -100,10 +100,23 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 35e5d86034c..181c92b92d4 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -274,10 +274,23 @@ FROM s3( - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. **See Also** diff --git a/docs/en/sql-reference/table-functions/timeSeriesData.md b/docs/en/sql-reference/table-functions/timeSeriesData.md new file mode 100644 index 00000000000..aa7a9d30c2a --- /dev/null +++ b/docs/en/sql-reference/table-functions/timeSeriesData.md @@ -0,0 +1,28 @@ +--- +slug: /en/sql-reference/table-functions/timeSeriesData +sidebar_position: 145 +sidebar_label: timeSeriesData +--- + +# timeSeriesData + +`timeSeriesData(db_name.time_series_table)` - Returns the [data](../../engines/table-engines/integrations/time-series.md#data-table) table +used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md): + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA data_table +``` + +The function also works if the _data_ table is inner: + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +``` + +The following queries are equivalent: + +``` sql +SELECT * FROM timeSeriesData(db_name.time_series_table); +SELECT * FROM timeSeriesData('db_name.time_series_table'); +SELECT * FROM timeSeriesData('db_name', 'time_series_table'); +``` diff --git a/docs/en/sql-reference/table-functions/timeSeriesMetrics.md b/docs/en/sql-reference/table-functions/timeSeriesMetrics.md new file mode 100644 index 00000000000..913f1185bca --- /dev/null +++ b/docs/en/sql-reference/table-functions/timeSeriesMetrics.md @@ -0,0 +1,28 @@ +--- +slug: /en/sql-reference/table-functions/timeSeriesMetrics +sidebar_position: 145 +sidebar_label: timeSeriesMetrics +--- + +# timeSeriesMetrics + +`timeSeriesMetrics(db_name.time_series_table)` - Returns the [metrics](../../engines/table-engines/integrations/time-series.md#metrics-table) table +used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md): + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS metrics_table +``` + +The function also works if the _metrics_ table is inner: + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +``` + +The following queries are equivalent: + +``` sql +SELECT * FROM timeSeriesMetrics(db_name.time_series_table); +SELECT * FROM timeSeriesMetrics('db_name.time_series_table'); +SELECT * FROM timeSeriesMetrics('db_name', 'time_series_table'); +``` diff --git a/docs/en/sql-reference/table-functions/timeSeriesTags.md b/docs/en/sql-reference/table-functions/timeSeriesTags.md new file mode 100644 index 00000000000..663a7dc6ac8 --- /dev/null +++ b/docs/en/sql-reference/table-functions/timeSeriesTags.md @@ -0,0 +1,28 @@ +--- +slug: /en/sql-reference/table-functions/timeSeriesTags +sidebar_position: 145 +sidebar_label: timeSeriesTags +--- + +# timeSeriesTags + +`timeSeriesTags(db_name.time_series_table)` - Returns the [tags](../../engines/table-engines/integrations/time-series.md#tags-table) table +used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md): + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS tags_table +``` + +The function also works if the _tags_ table is inner: + +``` sql +CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef' +``` + +The following queries are equivalent: + +``` sql +SELECT * FROM timeSeriesTags(db_name.time_series_table); +SELECT * FROM timeSeriesTags('db_name.time_series_table'); +SELECT * FROM timeSeriesTags('db_name', 'time_series_table'); +``` diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 3bb7aff53a7..b4027594e7c 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index aee445da843..f8a660fbec9 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -22,18 +22,26 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su ### Из deb-пакетов {#install-from-deb-packages} -Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: +Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: ``` bash -sudo apt-get install -y apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg -echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update +``` +#### Установка ClickHouse server и client + +```bash sudo apt-get install -y clickhouse-server clickhouse-client +``` +#### Запуск ClickHouse server + +```bash sudo service clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you've set up a password. ``` @@ -55,7 +63,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor ::: ### Из rpm-пакетов {#from-rpm-packages} -Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. +Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. #### Установка официального репозитория @@ -102,7 +110,7 @@ sudo yum install clickhouse-server clickhouse-client ### Из tgz-архивов {#from-tgz-archives} -Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. +Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index 48ee7c9f15d..67656f24ba3 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -146,7 +146,30 @@ SELECT dictGet('dict', 'B', 2); ## Пример использования именованных соединений с базой данных PostgreSQL -Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md). +Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md). Дополнительно есть алиасы: +- `username` для `user` +- `db` для `database`. + +Параметр `addresses_expr` используется в коллекции вместо `host:port`. Параметр опционален, потому что есть так же другие: `host`, `hostname`, `port`. Следующий псевдокод показывает приоритет: + +```sql +CASE + WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr'] + WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432') + WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432') +END +``` + +Пример создания: +```sql +CREATE NAMED COLLECTION mypg AS +user = 'pguser', +password = 'jw8s0F4', +host = '127.0.0.1', +port = 5432, +database = 'test', +schema = 'test_schema' +``` Пример конфигурации: ```xml @@ -199,6 +222,10 @@ SELECT * FROM mypgtable; └───┘ ``` +:::note +PostgreSQL копирует данные из named collection при создании таблицы. Изменения в коллекции не влияют на существующие таблицы. +::: + ### Пример использования именованных соединений базой данных с движком PostgreSQL ```sql diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 867a6665f4b..5064391f582 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -54,29 +54,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Нельзя объединять интервалы различных типов. Нельзя использовать интервалы вида `4 DAY 1 HOUR`. Вместо этого выражайте интервал в единицах меньших или равных минимальной единице интервала, например, интервал «1 день и 1 час» можно выразить как `25 HOUR` или `90000 SECOND`. - -Арифметические операции со значениями типов `Interval` не доступны, однако можно последовательно добавлять различные интервалы к значениям типов `Date` и `DateTime`. Например: +Также можно использовать различные типы интервалов одновременно: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -Следующий запрос приведёт к генерированию исключения: +И сравнивать значения из разными интервалами: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## Смотрите также {#smotrite-takzhe} diff --git a/docs/zh/operations/external-authenticators/kerberos.md b/docs/zh/operations/external-authenticators/kerberos.md index 649a0b9bd48..d1a39bbc952 100644 --- a/docs/zh/operations/external-authenticators/kerberos.md +++ b/docs/zh/operations/external-authenticators/kerberos.md @@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos 示例 (进入 `config.xml`): ```xml - + - + ``` 主体规范: ```xml - + HTTP/clickhouse.example.com@EXAMPLE.COM - + ``` 按领域过滤: ```xml - + EXAMPLE.COM - + ``` !!! warning "注意" @@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式: 示例 (进入 `users.xml`): ``` - + @@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式: - + ``` !!! warning "警告" diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index ce3a4659e0e..3add371b30f 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -1,4 +1,4 @@ -add_compile_options($<$,$>:${COVERAGE_FLAGS}>) +add_compile_options("$<$,$>:${COVERAGE_FLAGS}>") if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") diff --git a/programs/bash-completion/completions/CMakeLists.txt b/programs/bash-completion/completions/CMakeLists.txt index d364e07ef6e..2e911e81981 100644 --- a/programs/bash-completion/completions/CMakeLists.txt +++ b/programs/bash-completion/completions/CMakeLists.txt @@ -6,6 +6,7 @@ macro(configure_bash_completion) COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion OUTPUT_VARIABLE ${out} OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY ) endif() string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}") diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 48dca82eb2b..36f774a3c12 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -75,6 +75,8 @@ public: const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, + const String & proto_recv_chunked_, const String & quota_key_, const String & stage, bool randomize_, @@ -128,7 +130,9 @@ public: connections.emplace_back(std::make_unique( concurrency, cur_host, cur_port, - default_database_, user_, password_, quota_key_, + default_database_, user_, password_, + proto_send_chunked_, proto_recv_chunked_, + quota_key_, /* cluster_= */ "", /* cluster_secret_= */ "", /* client_name_= */ std::string(DEFAULT_CLIENT_NAME), @@ -662,6 +666,50 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) Strings hosts = options.count("host") ? options["host"].as() : Strings({"localhost"}); + String proto_send_chunked {"notchunked"}; + String proto_recv_chunked {"notchunked"}; + + if (options.count("proto_caps")) + { + std::string proto_caps_str = options["proto_caps"].as(); + + std::vector proto_caps; + splitInto<','>(proto_caps, proto_caps_str); + + for (auto cap_str : proto_caps) + { + std::string direction; + + if (cap_str.starts_with("send_")) + { + direction = "send"; + cap_str = cap_str.substr(std::string_view("send_").size()); + } + else if (cap_str.starts_with("recv_")) + { + direction = "recv"; + cap_str = cap_str.substr(std::string_view("recv_").size()); + } + + if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str); + + if (direction.empty()) + { + proto_send_chunked = cap_str; + proto_recv_chunked = cap_str; + } + else + { + if (direction == "send") + proto_send_chunked = cap_str; + else + proto_recv_chunked = cap_str; + } + } + } + + Benchmark benchmark( options["concurrency"].as(), options["delay"].as(), @@ -673,6 +721,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["database"].as(), options["user"].as(), options["password"].as(), + proto_send_chunked, + proto_recv_chunked, options["quota_key"].as(), options["stage"].as(), options.count("randomize"), diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1d99d223ee9..25c94c56aa6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -223,7 +223,7 @@ std::vector Client::loadWarningMessages() size_t rows = packet.block.rows(); for (size_t i = 0; i < rows; ++i) - messages.emplace_back(column[i].get()); + messages.emplace_back(column[i].safeGet()); } continue; diff --git a/programs/client/Client.h b/programs/client/Client.h index 7fdf77031ab..07a8e293b1a 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -11,7 +11,10 @@ class Client : public ClientApplicationBase public: using Arguments = ClientApplicationBase::Arguments; - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index c32b63413e9..6eb8976a6ef 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -38,6 +38,24 @@ {display_name} \e[1;31m:)\e[0m + + + + + + 9000 + + + diff --git a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp index 950ea09669a..7cebdc2ad65 100644 --- a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp +++ b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -25,6 +26,12 @@ static int64_t port = 9000; using namespace std::chrono_literals; +void on_exit() +{ + BaseDaemon::terminate(); + main_app.wait(); +} + extern "C" int LLVMFuzzerInitialize(int * argc, char ***argv) { @@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv) exit(-1); } + atexit(on_exit); + return 0; } diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index 90effdab70f..d9ee89b45ce 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -93,7 +93,7 @@ namespace break; } - UUID id = parse(line); + UUID id = parse(line.substr(0, line.find('\t'))); line.clear(); String queries; diff --git a/src/Access/HTTPAuthClient.h b/src/Access/HTTPAuthClient.h index a8b56cf05a7..a1b97a729a3 100644 --- a/src/Access/HTTPAuthClient.h +++ b/src/Access/HTTPAuthClient.h @@ -82,7 +82,8 @@ public: Result authenticate(const String & user_name, const String & password) const { - Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()}; + Poco::Net::HTTPRequest request{ + Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1}; Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password}; basic_credentials.authenticate(request); diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index 2d94df2eea5..cc1f1520b67 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO SubscriptionsOnRoles new_subscriptions_on_roles; new_subscriptions_on_roles.reserve(subscriptions_on_roles.size()); - auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); }; + auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); }; for (const auto & current_role : enabled_roles.params.current_roles) collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 7034e6373b1..5cc9f725b46 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -780,12 +780,12 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); has_limit = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, @@ -816,11 +816,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[i].get() < 0) || - (type == Field::Types::UInt64 && parameters[i].get() == 0)) + if ((type == Field::Types::Int64 && parameters[i].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[i].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - return parameters[i].get(); + return parameters[i].safeGet(); }; UInt64 max_elems = get_parameter(0); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 38f2fcb9fb9..36d00b1d9ec 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -83,16 +83,16 @@ public: if (version == 1) { for (size_t i = 0; i < arr_size; ++i) - set.insert(static_cast((*data_column)[offset + i].get())); + set.insert(static_cast((*data_column)[offset + i].safeGet())); } else if (!set.empty()) { typename State::Set new_set; for (size_t i = 0; i < arr_size; ++i) { - typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].get())); + typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].safeGet())); if (set_value != nullptr) - new_set.insert(static_cast((*data_column)[offset + i].get())); + new_set.insert(static_cast((*data_column)[offset + i].safeGet())); } set = std::move(new_set); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index 026b8d1956f..2c3ac7f883e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -269,12 +269,12 @@ AggregateFunctionPtr createAggregateFunctionMoving( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); - if ((type == Field::Types::Int64 && parameters[0].get() <= 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp index d41d743e17a..27043ed6aa6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -397,11 +397,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 5494ef74705..636ac80e350 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -247,7 +247,7 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name); - delimiter = parameters[0].get(); + delimiter = parameters[0].safeGet(); } if (parameters.size() == 2) { @@ -256,12 +256,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name); - if ((type == Field::Types::Int64 && parameters[1].get() <= 0) || - (type == Field::Types::UInt64 && parameters[1].get() == 0)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get()); + if ((type == Field::Types::Int64 && parameters[1].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[1].safeGet() == 0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].safeGet()); has_limit = true; - limit = parameters[1].get(); + limit = parameters[1].safeGet(); } if (has_limit) diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 7b4300b3568..5cbf449c946 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -323,12 +323,12 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 04eebe9f485..28e8d37b8c8 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -238,7 +238,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -255,7 +255,7 @@ public: if (params[1].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName()); - method = params[1].get(); + method = params[1].safeGet(); if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. " "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName()); diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index 6d1e3c0f64b..813b13b6f7b 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -181,7 +181,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName()); - total_buckets = params[0].get(); + total_buckets = params[0].safeGet(); this->x_type = WhichDataType(arguments[0]).idx; this->y_type = WhichDataType(arguments[1]).idx; diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index f088737c340..ecd848f5af3 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -114,7 +114,7 @@ private: { if (ind < first.size()) return first[ind]; - return second[ind % first.size()]; + return second[ind - first.size()]; } size_t size() const @@ -152,7 +152,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -169,7 +169,7 @@ public: if (params[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName()); - continuity_correction = static_cast(params[1].get()); + continuity_correction = static_cast(params[1].safeGet()); } String getName() const override diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 127dc06b642..423fd4bc569 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -117,7 +117,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName()); - relative_accuracy = relative_accuracy_field.get(); + relative_accuracy = relative_accuracy_field.safeGet(); if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy)) throw Exception( @@ -147,9 +147,9 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName()); if (accuracy_field.getType() == Field::Types::Int64) - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); else - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); if (accuracy <= 0) throw Exception( diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 666136a91b7..9a94c3dfe1a 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -300,12 +300,12 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal32) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } else if (value.getType() == Field::Types::Decimal64) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } @@ -355,7 +355,7 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal128) { - auto source = value.get>(); + auto source = value.safeGet>(); WhichDataType value_type(values_types[col_idx]); if (value_type.isDecimal32()) { @@ -560,7 +560,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val > x) { x = val; @@ -600,7 +600,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val < x) { x = val; diff --git a/src/AggregateFunctions/fuzzers/CMakeLists.txt b/src/AggregateFunctions/fuzzers/CMakeLists.txt index 3ff7c86a00c..1ce0c52feb7 100644 --- a/src/AggregateFunctions/fuzzers/CMakeLists.txt +++ b/src/AggregateFunctions/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS}) -target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions) +target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index c65090f5b55..3a99ad08ad8 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -177,9 +177,10 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. * For this reason, we use a string literal representing a date instead of a Decimal64 literal. */ - if (WhichDataType(constant_value_type->getTypeId()).isDateTime64()) + const auto & constant_value_end_type = removeNullable(constant_value_type); /// if Nullable + if (WhichDataType(constant_value_end_type->getTypeId()).isDateTime64()) { - const auto * date_time_type = typeid_cast(constant_value_type.get()); + const auto * date_time_type = typeid_cast(constant_value_end_type.get()); DecimalField decimal_value; if (constant_value_literal.tryGet>(decimal_value)) { diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index e98b04fe9a9..8e4e0725a2d 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -242,7 +242,8 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const /// Avoid cast for `IN tuple(...)` expression. /// Tuples could be quite big, and adding a type may significantly increase query size. /// It should be safe because set type for `column IN tuple` is deduced from `column` type. - if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 && argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT) + if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 && argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT + && !static_cast(argument_nodes[1].get())->hasSourceExpression()) new_options.add_cast_for_constants = false; const auto & parameters = getParameters(); diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 76dc8ab94b4..f31920f8e33 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -137,7 +137,7 @@ private: if (constant_node_value.getType() != Field::Types::Which::Tuple) return {}; - const auto & constant_tuple = constant_node_value.get(); + const auto & constant_tuple = constant_node_value.safeGet(); const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes(); size_t function_arguments_nodes_size = function_arguments_nodes.size(); diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 2b2ac95d7b9..6c4ce789993 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -89,7 +89,7 @@ public: if (!pattern || !isString(pattern->getResultType())) continue; - auto regexp = likePatternToRegexp(pattern->getValue().get()); + auto regexp = likePatternToRegexp(pattern->getValue().safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index b968f43c6a6..1fc3eec6833 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -68,10 +68,10 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); if (value.getType() == Field::Types::UInt64) - return data_type_tuple.getNameByPosition(value.get()); + return data_type_tuple.getNameByPosition(value.safeGet()); return ""; } @@ -79,7 +79,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & dat String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); return ""; } diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 0175e304a2b..f3b109a10ed 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -187,7 +187,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector & nodes /// Sort nodes and parameters in ascending order of quantile level std::vector permutation(nodes.size()); iota(permutation.data(), permutation.size(), size_t(0)); - std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get() < parameters[j].get(); }); + std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].safeGet() < parameters[j].safeGet(); }); std::vector new_nodes; new_nodes.reserve(permutation.size()); diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index d966f129d08..f81327c5d55 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -134,8 +134,8 @@ public: return; std::set string_values; - string_values.insert(first_literal->getValue().get()); - string_values.insert(second_literal->getValue().get()); + string_values.insert(first_literal->getValue().safeGet()); + string_values.insert(second_literal->getValue().safeGet()); changeIfArguments(*function_if_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_if_node), context); @@ -163,7 +163,7 @@ public: if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType())) return; - auto array_to = literal_to->getValue().get(); + auto array_to = literal_to->getValue().safeGet(); if (array_to.empty()) return; @@ -178,9 +178,9 @@ public: std::set string_values; for (const auto & value : array_to) - string_values.insert(value.get()); + string_values.insert(value.safeGet()); - string_values.insert(literal_default->getValue().get()); + string_values.insert(literal_default->getValue().safeGet()); changeTransformArguments(*function_modified_transform_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_transform_node), context); diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 3a8b6e75d40..02f1c93ea7f 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -54,7 +54,7 @@ public: } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && - first_argument_constant_literal.get() == 1) + first_argument_constant_literal.safeGet() == 1) { function_node->getArguments().getNodes().clear(); resolveAggregateFunctionNodeByName(*function_node, "count"); diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index feb8bcc792d..0f33c302265 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -143,13 +143,13 @@ private: const auto & column_type = column_node_typed.getColumnType().get(); if (isDateOrDate32(column_type)) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column_type) || isDateTime64(column_type)) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a48e88132a6..091061ceb81 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -60,7 +60,7 @@ public: { const auto & second_const_value = second_const_node->getValue(); if (second_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond) @@ -89,7 +89,7 @@ public: { const auto & first_const_value = first_const_node->getValue(); if (first_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond)) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1524629dc81..a987ced497a 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -66,7 +66,7 @@ public: resolveAggregateFunctionNodeByName(*function_node, "countIf"); - if (constant_value_literal.get() != 1) + if (constant_value_literal.safeGet() != 1) { /// Rewrite `sumIf(123, cond)` into `123 * countIf(cond)` node = getMultiplyFunction(std::move(multiplier_node), node); @@ -105,8 +105,8 @@ public: const auto & if_true_condition_constant_value_literal = if_true_condition_constant_node->getValue(); const auto & if_false_condition_constant_value_literal = if_false_condition_constant_node->getValue(); - auto if_true_condition_value = if_true_condition_constant_value_literal.get(); - auto if_false_condition_value = if_false_condition_constant_value_literal.get(); + auto if_true_condition_value = if_true_condition_constant_value_literal.safeGet(); + auto if_false_condition_value = if_false_condition_constant_value_literal.safeGet(); if (if_false_condition_value == 0) { diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index fb41826929f..9754897d54d 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -471,7 +471,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::shared_ptr collator; if (order_by_element.getCollation()) - collator = std::make_shared(order_by_element.getCollation()->as().value.get()); + collator = std::make_shared(order_by_element.getCollation()->as().value.safeGet()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 447bf825836..80e7d1e4445 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -452,10 +452,10 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromCompoundExpression( if (auto * column = compound_expression->as()) { const DataTypePtr & column_type = column->getColumn().getTypeInStorage(); - if (column_type->getTypeId() == TypeIndex::Object) + if (column_type->getTypeId() == TypeIndex::ObjectDeprecated) { - const auto * object_type = checkAndGetDataType(column_type.get()); - if (object_type->getSchemaFormat() == "json" && object_type->hasNullableSubcolumns()) + const auto & object_type = checkAndGetDataType(*column_type); + if (object_type.getSchemaFormat() == "json" && object_type.hasNullableSubcolumns()) { QueryTreeNodePtr constant_node_null = std::make_shared(Field()); return constant_node_null; @@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( result_column_node = it->second; } /// Check if it's a dynamic subcolumn - else + else if (table_expression_data.supports_subcolumns) { auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); auto jt = table_expression_data.column_name_to_column_node.find(column_name); @@ -1000,7 +1000,6 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromJoin(const Identifi if (!join_node_in_resolve_process && from_join_node.isUsingJoinExpression()) { auto & join_using_list = from_join_node.getJoinExpression()->as(); - for (auto & join_using_node : join_using_list.getNodes()) { auto & column_node = join_using_node->as(); @@ -1273,7 +1272,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( const auto & constant_node_value = constant_node.getValue(); if (constant_node_value.getType() == Field::Types::String) { - array_join_subcolumn_prefix = constant_node_value.get() + "."; + array_join_subcolumn_prefix = constant_node_value.safeGet() + "."; array_join_parent_column = argument_nodes.at(0).get(); } } @@ -1287,7 +1286,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( if (!second_argument || second_argument->getValue().getType() != Field::Types::String) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree()); - const auto & resolved_subcolumn_path = second_argument->getValue().get(); + const auto & resolved_subcolumn_path = second_argument->getValue().safeGet(); if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix)) return {}; @@ -1331,7 +1330,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression size_t nested_function_arguments_size = nested_function_arguments.size(); const auto & nested_keys_names_constant_node = nested_function_arguments[0]->as(); - const auto & nested_keys_names = nested_keys_names_constant_node.getValue().get(); + const auto & nested_keys_names = nested_keys_names_constant_node.getValue().safeGet(); size_t nested_keys_names_size = nested_keys_names.size(); if (nested_keys_names_size == nested_function_arguments_size - 1) @@ -1344,7 +1343,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); - const auto & nested_key_name = nested_keys_names[i - 1].get(); + const auto & nested_key_name = nested_keys_names[i - 1].safeGet(); Identifier nested_identifier = Identifier(nested_key_name); array_join_resolved_expression = wrapExpressionNodeInTupleElement(array_join_column, nested_identifier, scope.context); break; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index e04b93314c5..a18c2901a58 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -748,11 +748,11 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_ UInt64 pos; if (constant_node->getValue().getType() == Field::Types::UInt64) { - pos = constant_node->getValue().get(); + pos = constant_node->getValue().safeGet(); } else // Int64 { - auto value = constant_node->getValue().get(); + auto value = constant_node->getValue().safeGet(); if (value > 0) pos = value; else @@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); if (storage_snapshot->storage.supportsSubcolumns()) + { get_column_options.withSubcolumns(); + table_expression_data.supports_subcolumns = true; + } auto column_names_and_types = storage_snapshot->getColumns(get_column_options); table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end()); diff --git a/src/Analyzer/Resolve/TableExpressionData.h b/src/Analyzer/Resolve/TableExpressionData.h index 18cbfa32366..6770672d0c2 100644 --- a/src/Analyzer/Resolve/TableExpressionData.h +++ b/src/Analyzer/Resolve/TableExpressionData.h @@ -36,6 +36,7 @@ struct AnalysisTableExpressionData std::string database_name; std::string table_name; bool should_qualify_columns = true; + bool supports_subcolumns = false; NamesAndTypes column_names_and_types; ColumnNameToColumnNodeMap column_name_to_column_node; std::unordered_set subcolumn_names; /// Subset columns that are subcolumns of other columns diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index e7d1a22f45a..59a243b27f3 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -99,7 +99,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& "Invalid type in set. Expected tuple, got {}", value.getTypeName()); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); const DataTypePtr & value_type = value_types[collection_index]; const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); @@ -175,15 +175,15 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (rhs_which_type.isArray()) { const DataTypeArray * value_array_type = assert_cast(value_type.get()); - size_t value_array_size = value.get().size(); + size_t value_array_size = value.safeGet().size(); DataTypes value_types(value_array_size, value_array_type->getNestedType()); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else if (rhs_which_type.isTuple()) { const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); const DataTypes & value_types = value_tuple_type->getElements(); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 37ddd344001..e982a806b7c 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -126,7 +126,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - const auto & replicas = array_of_replicas->value.get(); + const auto & replicas = array_of_replicas->value.safeGet(); res[i].resize(replicas.size()); for (size_t j = 0; j != replicas.size(); ++j) { @@ -135,7 +135,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - res[i][j] = replica.get(); + res[i][j] = replica.safeGet(); } } } diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 910831195a3..4dcbdcc1617 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -46,8 +46,8 @@ namespace if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); + String & zookeeper_path_arg = zookeeper_path_ast->value.safeGet(); + String & replica_name_arg = replica_name_ast->value.safeGet(); if (create.uuid != UUIDHelpers::Nil) { String table_uuid_str = toString(create.uuid); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index a974fc11d00..8e60e8d129e 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -31,7 +31,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreTableCreationMode::kCreate; @@ -54,7 +54,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreTableCreationMode::kCreate; @@ -95,7 +95,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreAccessCreationMode::kCreate; @@ -118,7 +118,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreAccessCreationMode::kCreate; diff --git a/src/Backups/SettingsFieldOptionalString.cpp b/src/Backups/SettingsFieldOptionalString.cpp index 573fd1e052c..684407a533d 100644 --- a/src/Backups/SettingsFieldOptionalString.cpp +++ b/src/Backups/SettingsFieldOptionalString.cpp @@ -19,7 +19,7 @@ SettingFieldOptionalString::SettingFieldOptionalString(const Field & field) if (field.getType() == Field::Types::String) { - value = field.get(); + value = field.safeGet(); return; } diff --git a/src/Backups/SettingsFieldOptionalUUID.cpp b/src/Backups/SettingsFieldOptionalUUID.cpp index 3f14608b206..0011f7f1073 100644 --- a/src/Backups/SettingsFieldOptionalUUID.cpp +++ b/src/Backups/SettingsFieldOptionalUUID.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str.empty()) { value = std::nullopt; diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 181e6331ac9..9f22085f5a9 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -68,13 +68,19 @@ const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const { - /// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there + zkutil::ZooKeeperPtr current_zookeeper; + { + std::lock_guard lock(zookeeper_mutex); + current_zookeeper = zookeeper; + } + + /// We need to create new instance of ZooKeeperWithFaultInjection each time and copy a pointer to ZooKeeper client there /// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition /// when the same object is used from multiple threads. auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance( settings.keeper_fault_injection_probability, settings.keeper_fault_injection_seed, - zookeeper, + current_zookeeper, log->name(), log); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c133971785..95e431b54be 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -add_compile_options($<$,$>:${COVERAGE_FLAGS}>) +add_compile_options("$<$,$>:${COVERAGE_FLAGS}>") if (USE_INCLUDE_WHAT_YOU_USE) set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH}) @@ -78,6 +78,7 @@ add_headers_and_sources(clickhouse_common_io Common/Scheduler) add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes) add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/Archives) +add_headers_and_sources(clickhouse_common_io IO/Protobuf) add_headers_and_sources(clickhouse_common_io IO/S3) add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) @@ -225,6 +226,7 @@ add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_windowview Storages/WindowView) add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) +add_object_library(clickhouse_storages_time_series Storages/TimeSeries) add_object_library(clickhouse_client Client) # Always compile this file with the highest possible level of optimizations, even in Debug builds. # https://github.com/ClickHouse/ClickHouse/issues/65745 @@ -351,8 +353,8 @@ target_link_libraries(clickhouse_common_io Poco::Foundation ) -if (TARGET ch_contrib::fiu) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu) +if (TARGET ch_contrib::libfiu) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::libfiu) endif() if (TARGET ch_contrib::cpuid) @@ -469,6 +471,7 @@ dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash) if (TARGET ch_contrib::protobuf) dbms_target_link_libraries (PRIVATE ch_contrib::protobuf) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::protobuf) endif () if (TARGET clickhouse_grpc_protos) @@ -553,14 +556,13 @@ target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) if (TARGET ch_contrib::qpl) dbms_target_link_libraries(PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) endif () -if (TARGET ch_contrib::accel-config) - dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) -endif () - -if (TARGET ch_contrib::qatzstd_plugin) +if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin) dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) + dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) endif () @@ -599,10 +601,6 @@ endif() dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing) -if (TARGET ch_contrib::annoy) - dbms_target_link_libraries(PUBLIC ch_contrib::annoy) -endif() - if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 9f133616d2e..0649aa5f5d7 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -158,6 +158,8 @@ void ClientApplicationBase::init(int argc, char ** argv) ("config-file,C", po::value(), "config-file path") + ("proto_caps", po::value(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional") + ("query,q", po::value>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)") ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") ("multiquery,n", "Obsolete, does nothing") @@ -200,8 +202,6 @@ void ClientApplicationBase::init(int argc, char ** argv) ("pager", po::value(), "Pipe all output into this command (less or similar)") ("max_memory_usage_in_client", po::value(), "Set memory limit in client/local server") - ("fuzzer-args", po::value(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.") - ("client_logs_file", po::value(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") ; @@ -339,6 +339,41 @@ void ClientApplicationBase::init(int argc, char ** argv) if (options.count("server_logs_file")) server_logs_file = options["server_logs_file"].as(); + if (options.count("proto_caps")) + { + std::string proto_caps_str = options["proto_caps"].as(); + + std::vector proto_caps; + splitInto<','>(proto_caps, proto_caps_str); + + for (auto cap_str : proto_caps) + { + std::string direction; + + if (cap_str.starts_with("send_")) + { + direction = "send"; + cap_str = cap_str.substr(std::string_view("send_").size()); + } + else if (cap_str.starts_with("recv_")) + { + direction = "recv"; + cap_str = cap_str.substr(std::string_view("recv_").size()); + } + + if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str); + + if (direction.empty()) + { + config().setString("proto_caps.send", std::string(cap_str)); + config().setString("proto_caps.recv", std::string(cap_str)); + } + else + config().setString("proto_caps." + direction, std::string(cap_str)); + } + } + query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); query_kind = parseQueryKind(options["query_kind"].as()); profile_events.print = options.count("print-profile-events"); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5642a72608c..01d03006eec 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -73,9 +73,11 @@ #include #include #include +#include #include #include +#include #include "config.h" #include #include @@ -329,7 +331,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting { output_stream << std::endl; WriteBufferFromOStream res_buf(output_stream, 4096); - formatAST(*res, res_buf); + IAST::FormatSettings format_settings(res_buf, /* one_line */ false); + format_settings.hilite = true; + format_settings.show_secrets = true; + format_settings.print_pretty_type_names = true; + res->format(format_settings); res_buf.finalize(); output_stream << std::endl << std::endl; } @@ -477,6 +483,8 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info) { if (profile_info.hasAppliedLimit() && output_format) output_format->setRowsBeforeLimit(profile_info.getRowsBeforeLimit()); + if (profile_info.hasAppliedAggregation() && output_format) + output_format->setRowsBeforeAggregation(profile_info.getRowsBeforeAggregation()); } @@ -912,6 +920,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) } catch (Exception & e) { + if (server_exception) + server_exception->rethrow(); if (!is_interactive) e.addMessage("(in query: {})", full_query); throw; @@ -1030,19 +1040,28 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa query_interrupt_handler.start(signals_before_stop); SCOPE_EXIT({ query_interrupt_handler.stop(); }); - connection->sendQuery( - connection_parameters.timeouts, - query, - query_parameters, - client_context->getCurrentQueryId(), - query_processing_stage, - &client_context->getSettingsRef(), - &client_context->getClientInfo(), - true, - [&](const Progress & progress) { onProgress(progress); }); + try { + connection->sendQuery( + connection_parameters.timeouts, + query, + query_parameters, + client_context->getCurrentQueryId(), + query_processing_stage, + &client_context->getSettingsRef(), + &client_context->getClientInfo(), + true, + [&](const Progress & progress) { onProgress(progress); }); + + if (send_external_tables) + sendExternalTables(parsed_query); + } + catch (const NetException &) + { + // We still want to attempt to process whatever we already received or can receive (socket receive buffer can be not empty) + receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); + throw; + } - if (send_external_tables) - sendExternalTables(parsed_query); receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); break; @@ -2535,6 +2554,7 @@ void ClientBase::runInteractive() *suggest, history_file, getClientConfiguration().has("multiline"), + getClientConfiguration().getBool("ignore_shell_suspend", true), query_extenders, query_delimiters, word_break_characters, @@ -2749,7 +2769,7 @@ void ClientBase::runLibFuzzer() for (auto & arg : fuzzer_args_holder) fuzzer_args.emplace_back(arg.data()); - int fuzzer_argc = fuzzer_args.size(); + int fuzzer_argc = static_cast(fuzzer_args.size()); char ** fuzzer_argv = fuzzer_args.data(); LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 1a23b6b1363..45251aea28a 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 799c7511982..da6e5baa3ad 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -5,8 +5,6 @@ #include #include #include -#include -#include #include #include #include @@ -85,6 +83,7 @@ Connection::~Connection() Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, const String & proto_recv_chunked_, [[maybe_unused]] const SSHKey & ssh_private_key_, const String & jwt_, const String & quota_key_, @@ -95,6 +94,7 @@ Connection::Connection(const String & host_, UInt16 port_, Protocol::Secure secure_) : host(host_), port(port_), default_database(default_database_) , user(user_), password(password_) + , proto_send_chunked(proto_send_chunked_), proto_recv_chunked(proto_recv_chunked_) #if USE_SSH , ssh_private_key(ssh_private_key_) #endif @@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// work we need to pass host name separately. It will be send into TLS Hello packet to let /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). static_cast(socket.get())->setPeerHostName(host); + /// we want to postpone SSL handshake until first read or write operation + /// so any errors during negotiation would be properly processed + static_cast(socket.get())->setLazyHandshake(true); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support."); #endif @@ -211,10 +214,10 @@ void Connection::connect(const ConnectionTimeouts & timeouts) , tcp_keep_alive_timeout_in_sec); } - in = std::make_shared(*socket); + in = std::make_shared(*socket); in->setAsyncCallback(async_callback); - out = std::make_shared(*socket); + out = std::make_shared(*socket); out->setAsyncCallback(async_callback); connected = true; setDescription(); @@ -222,9 +225,61 @@ void Connection::connect(const ConnectionTimeouts & timeouts) sendHello(); receiveHello(timeouts.handshake_timeout); + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + /// Client side of chunked protocol negotiation. + /// Server advertises its protocol capabilities (separate for send and receive channels) by sending + /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. + /// Not optional types are strict meaning that server only supports this type, optional means that + /// server prefer this type but capable to work in opposite. + /// Client selects which type it is going to communicate based on the settings from config or arguments, + /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. + /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example + /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case + /// client should interrupt this connection. However if client continues with incompatible protocol type request, server + /// will send appropriate exception and disconnect client. + + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) + { + bool chunked_srv = chunked_srv_str.starts_with("chunked"); + bool optional_srv = chunked_srv_str.ends_with("_optional"); + bool chunked_cl = chunked_cl_str.starts_with("chunked"); + bool optional_cl = chunked_cl_str.ends_with("_optional"); + + if (optional_srv) + return chunked_cl; + if (optional_cl) + return chunked_srv; + if (chunked_cl != chunked_srv) + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: {} set to {}, server requires {}", + direction, + chunked_cl ? "chunked" : "notchunked", + chunked_srv ? "chunked" : "notchunked"); + + return chunked_srv; + }; + + proto_send_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send") ? "chunked" : "notchunked"; + proto_recv_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv") ? "chunked" : "notchunked"; + } + else + { + if (proto_send_chunked == "chunked" || proto_recv_chunked == "chunked") + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: server's version is too old and doesn't support chunked protocol while client settings require it."); + } + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) sendAddendum(); + if (proto_send_chunked == "chunked") + out->enableChunked(); + if (proto_recv_chunked == "chunked") + in->enableChunked(); + LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.", server_name, server_version_major, server_version_minor, server_version_patch); } @@ -393,6 +448,13 @@ void Connection::sendAddendum() { if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) writeStringBinary(quota_key, *out); + + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + writeStringBinary(proto_send_chunked, *out); + writeStringBinary(proto_recv_chunked, *out); + } + out->next(); } @@ -472,6 +534,12 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout) else server_version_patch = server_revision; + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + readStringBinary(proto_send_chunked_srv, *in); + readStringBinary(proto_recv_chunked_srv, *in); + } + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { UInt64 rules_size; @@ -611,6 +679,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts) UInt64 pong = 0; writeVarUInt(Protocol::Client::Ping, *out); + out->finishChunk(); out->next(); if (in->eof()) @@ -660,6 +729,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time writeVarUInt(Protocol::Client::TablesStatusRequest, *out); request.write(*out, server_revision); + out->finishChunk(); out->next(); UInt64 response_type = 0; @@ -813,6 +883,8 @@ void Connection::sendQuery( block_profile_events_in.reset(); block_out.reset(); + out->finishChunk(); + /// Send empty block which means end of data. if (!with_pending_data) { @@ -829,6 +901,7 @@ void Connection::sendCancel() return; writeVarUInt(Protocol::Client::Cancel, *out); + out->finishChunk(); out->next(); } @@ -854,7 +927,10 @@ void Connection::sendData(const Block & block, const String & name, bool scalar) size_t prev_bytes = out->count(); block_out->write(block); - maybe_compressed_out->next(); + if (maybe_compressed_out != out) + maybe_compressed_out->next(); + if (!block) + out->finishChunk(); out->next(); if (throttler) @@ -865,6 +941,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector & uuids) { writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out); writeVectorBinary(uuids, *out); + out->finishChunk(); out->next(); } @@ -874,6 +951,7 @@ void Connection::sendReadTaskResponse(const String & response) writeVarUInt(Protocol::Client::ReadTaskResponse, *out); writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out); writeStringBinary(response, *out); + out->finishChunk(); out->next(); } @@ -882,6 +960,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp { writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); response.serialize(*out); + out->finishChunk(); out->next(); } @@ -899,6 +978,8 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String copyData(input, *out); else copyData(input, *out, size); + + out->finishChunk(); out->next(); } @@ -927,6 +1008,8 @@ void Connection::sendScalarsData(Scalars & data) sendData(elem.second, elem.first, true /* scalar */); } + out->finishChunk(); + out_bytes = out->count() - out_bytes; maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes; double elapsed = watch.elapsedSeconds(); @@ -1069,13 +1152,13 @@ std::optional Connection::getResolvedAddress() const bool Connection::poll(size_t timeout_microseconds) { - return static_cast(*in).poll(timeout_microseconds); + return in->poll(timeout_microseconds); } bool Connection::hasReadPendingData() const { - return last_input_packet_type.has_value() || static_cast(*in).hasPendingData(); + return last_input_packet_type.has_value() || in->hasBufferedData(); } @@ -1319,7 +1402,7 @@ Progress Connection::receiveProgress() const ProfileInfo Connection::receiveProfileInfo() const { ProfileInfo profile_info; - profile_info.read(*in); + profile_info.read(*in, server_revision); return profile_info; } @@ -1349,6 +1432,8 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.default_database, parameters.user, parameters.password, + parameters.proto_send_chunked, + parameters.proto_recv_chunked, parameters.ssh_private_key, parameters.jwt, parameters.quota_key, diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 0f4b3e436df..ed84bc51318 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -8,8 +8,8 @@ #include -#include -#include +#include +#include #include #include @@ -52,6 +52,7 @@ public: Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, const String & proto_recv_chunked_, const SSHKey & ssh_private_key_, const String & jwt_, const String & quota_key_, @@ -170,6 +171,10 @@ private: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; + String proto_send_chunked_srv; + String proto_recv_chunked_srv; #if USE_SSH SSHKey ssh_private_key; #endif @@ -209,8 +214,8 @@ private: String server_display_name; std::unique_ptr socket; - std::shared_ptr in; - std::shared_ptr out; + std::shared_ptr in; + std::shared_ptr out; std::optional last_input_packet_type; String query_id; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 303bebc30d2..4d0a9ffa08c 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -107,6 +107,9 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } } + proto_send_chunked = config.getString("proto_caps.send", "notchunked"); + proto_recv_chunked = config.getString("proto_caps.recv", "notchunked"); + quota_key = config.getString("quota_key", ""); /// By default compression is disabled if address looks like localhost. diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index c305c7813f2..382bfe34a3d 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -20,6 +20,8 @@ struct ConnectionParameters std::string default_database; std::string user; std::string password; + std::string proto_send_chunked = "notchunked"; + std::string proto_recv_chunked = "notchunked"; std::string quota_key; SSHKey ssh_private_key; std::string jwt; diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp index ed2e7c3c725..ab8ad08826c 100644 --- a/src/Client/ConnectionPool.cpp +++ b/src/Client/ConnectionPool.cpp @@ -13,6 +13,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get( String default_database, String user, String password, + String proto_send_chunked, + String proto_recv_chunked, String quota_key, String cluster, String cluster_secret, @@ -22,7 +24,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get( Priority priority) { Key key{ - max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority}; + max_connections, host, port, default_database, user, password, proto_send_chunked, proto_recv_chunked, quota_key, cluster, cluster_secret, client_name, compression, secure, priority}; std::lock_guard lock(mutex); auto [it, inserted] = pools.emplace(key, ConnectionPoolPtr{}); @@ -39,6 +41,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get( default_database, user, password, + proto_send_chunked, + proto_recv_chunked, quota_key, cluster, cluster_secret, diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 0fcb3c4e7e1..219548b62a0 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -73,6 +73,8 @@ public: const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, + const String & proto_recv_chunked_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -85,6 +87,8 @@ public: , default_database(default_database_) , user(user_) , password(password_) + , proto_send_chunked(proto_send_chunked_) + , proto_recv_chunked(proto_recv_chunked_) , quota_key(quota_key_) , cluster(cluster_) , cluster_secret(cluster_secret_) @@ -116,7 +120,9 @@ protected: { return std::make_shared( host, port, - default_database, user, password, SSHKey(), /*jwt*/ "", quota_key, + default_database, user, password, + proto_send_chunked, proto_recv_chunked, + SSHKey(), /*jwt*/ "", quota_key, cluster, cluster_secret, client_name, compression, secure); } @@ -125,6 +131,8 @@ private: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; String quota_key; /// For inter-server authorization @@ -150,6 +158,8 @@ public: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; String quota_key; String cluster; String cluster_secret; @@ -173,6 +183,8 @@ public: String default_database, String user, String password, + String proto_send_chunked, + String proto_recv_chunked, String quota_key, String cluster, String cluster_secret, @@ -190,6 +202,7 @@ inline bool operator==(const ConnectionPoolFactory::Key & lhs, const ConnectionP { return lhs.max_connections == rhs.max_connections && lhs.host == rhs.host && lhs.port == rhs.port && lhs.default_database == rhs.default_database && lhs.user == rhs.user && lhs.password == rhs.password + && lhs.proto_send_chunked == rhs.proto_send_chunked && lhs.proto_recv_chunked == rhs.proto_recv_chunked && lhs.quota_key == rhs.quota_key && lhs.cluster == rhs.cluster && lhs.cluster_secret == rhs.cluster_secret && lhs.client_name == rhs.client_name && lhs.compression == rhs.compression && lhs.secure == rhs.secure && lhs.priority == rhs.priority; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 072184e0a66..7595a29912b 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -365,7 +365,7 @@ bool LocalConnection::poll(size_t) { while (pollImpl()) { - LOG_DEBUG(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry"); + LOG_TEST(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry"); if (needSendProgressOrMetrics()) return true; diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 3b3508d1a58..37ceb471e5b 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -294,17 +294,19 @@ ReplxxLineReader::ReplxxLineReader( Suggest & suggest, const String & history_file_path_, bool multiline_, + bool ignore_shell_suspend, Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], replxx::Replxx::highlighter_callback_t highlighter_, - [[ maybe_unused ]] std::istream & input_stream_, - [[ maybe_unused ]] std::ostream & output_stream_, - [[ maybe_unused ]] int in_fd_, - [[ maybe_unused ]] int out_fd_, - [[ maybe_unused ]] int err_fd_ + std::istream & input_stream_, + std::ostream & output_stream_, + int in_fd_, + int out_fd_, + int err_fd_ ) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_) + , rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_) , highlighter(std::move(highlighter_)) , word_break_characters(word_break_characters_) , editor(getEditor()) @@ -363,7 +365,8 @@ ReplxxLineReader::ReplxxLineReader( rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); /// We don't want the default, "suspend" behavior, it confuses people. - rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character"); + if (ignore_shell_suspend) + rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character"); auto commit_action = [this](char32_t code) { @@ -514,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line) rx.history_add(line); // flush changes to the disk - if (!rx.history_save(history_file_path)) + if (history_file_fd >= 0 && !rx.history_save(history_file_path)) rx.print("Saving history failed: %s\n", errnoToString().c_str()); if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN)) diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h index c46080420ef..1dbad2c70dd 100644 --- a/src/Client/ReplxxLineReader.h +++ b/src/Client/ReplxxLineReader.h @@ -15,6 +15,7 @@ public: Suggest & suggest, const String & history_file_path, bool multiline, + bool ignore_shell_suspend, Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 0188ebc8173..affd620f83a 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -214,7 +214,7 @@ void Suggest::fillWordsFromBlock(const Block & block) Words new_words; new_words.reserve(rows); for (size_t i = 0; i < rows; ++i) - new_words.emplace_back(column[i].get()); + new_words.emplace_back(column[i].safeGet()); addWords(std::move(new_words)); } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 4bc48c62eb4..d3363d91a46 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -457,9 +457,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const Field ColumnAggregateFunction::operator[](size_t n) const { Field field = AggregateFunctionStateData(); - field.get().name = type_string; + field.safeGet().name = type_string; { - WriteBufferFromString buffer(field.get().data); + WriteBufferFromString buffer(field.safeGet().data); func->serialize(data[n], buffer, version); } return field; @@ -467,12 +467,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const void ColumnAggregateFunction::get(size_t n, Field & res) const { - res = AggregateFunctionStateData(); - res.get().name = type_string; - { - WriteBufferFromString buffer(res.get().data); - func->serialize(data[n], buffer, version); - } + res = operator[](n); } StringRef ColumnAggregateFunction::getDataAt(size_t n) const @@ -552,7 +547,7 @@ void ColumnAggregateFunction::insert(const Field & x) "Inserting field of type {} into ColumnAggregateFunction. Expected {}", x.getTypeName(), Field::Types::AggregateFunctionState); - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot insert filed with type {} into column with type {}", field_name, type_string); @@ -560,7 +555,7 @@ void ColumnAggregateFunction::insert(const Field & x) ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); } @@ -569,14 +564,14 @@ bool ColumnAggregateFunction::tryInsert(const DB::Field & x) if (x.getType() != Field::Types::AggregateFunctionState) return false; - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) return false; ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); return true; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 19cce678cc7..83d4c24c769 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const size, max_array_size_as_field); res = Array(); - Array & res_arr = res.get(); + Array & res_arr = res.safeGet(); res_arr.reserve(size); for (size_t i = 0; i < size; ++i) @@ -309,7 +309,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const void ColumnArray::insert(const Field & x) { - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) getData().insert(array[i]); @@ -321,7 +321,7 @@ bool ColumnArray::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Array) return false; - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) { @@ -452,6 +452,27 @@ void ColumnArray::reserve(size_t n) getData().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1. } +size_t ColumnArray::capacity() const +{ + return getOffsets().capacity(); +} + +void ColumnArray::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns source_data_columns; + source_data_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_array_column = assert_cast(*source_column); + new_size += source_array_column.size(); + source_data_columns.push_back(source_array_column.getDataPtr()); + } + + getOffsets().reserve_exact(new_size); + data->prepareForSquashing(source_data_columns); +} + void ColumnArray::shrinkToFit() { getOffsets().shrink_to_fit(); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 63affb86d9d..f77268a8be6 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -118,6 +118,8 @@ public: void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 59bfbd2159c..6f8360a54dd 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -53,6 +53,7 @@ public: size_t allocatedBytes() const override { return data.allocated_bytes(); } void protect() override { data.protect(); } void reserve(size_t n) override { data.reserve_exact(n); } + size_t capacity() const override { return data.capacity(); } void shrinkToFit() override { data.shrink_to_fit(); } #if !defined(DEBUG_OR_SANITIZER_BUILD) @@ -74,7 +75,7 @@ public: void insertData(const char * src, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } - void insert(const Field & x) override { data.push_back(x.get()); } + void insert(const Field & x) override { data.push_back(x.safeGet()); } bool tryInsert(const Field & x) override; #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index a92d54dd675..ef6cd7dcea2 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -1,17 +1,21 @@ #include #include +#include #include #include #include +#include #include #include #include #include #include #include -#include -#include +#include +#include +#include +#include namespace DB { @@ -22,31 +26,80 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; } - -ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) +namespace { - /// Create empty Variant. - variant_info.variant_type = std::make_shared(DataTypes{}); - variant_info.variant_name = variant_info.variant_type->getName(); - variant_column = variant_info.variant_type->createColumn(); + +/// Static default format settings to avoid creating it every time. +const FormatSettings & getFormatSettings() +{ + static const FormatSettings settings; + return settings; +} + +} + +/// Shared variant will contain String values but we cannot use usual String type +/// because we can have regular variant with type String. +/// To solve it, we use String type with custom name for shared variant. +DataTypePtr ColumnDynamic::getSharedVariantDataType() +{ + return DataTypeFactory::instance().getCustom("String", std::make_unique(std::make_unique(getSharedVariantTypeName()))); +} + +ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_), global_max_dynamic_types(max_dynamic_types) +{ + /// Create Variant with shared variant. + setVariantType(std::make_shared(DataTypes{getSharedVariantDataType()})); } ColumnDynamic::ColumnDynamic( - MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_) + MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_) : variant_column(std::move(variant_column_)) + , variant_column_ptr(assert_cast(variant_column.get())) + , max_dynamic_types(max_dynamic_types_) + , global_max_dynamic_types(global_max_dynamic_types_) + , statistics(statistics_) +{ + createVariantInfo(variant_type_); +} + +ColumnDynamic::ColumnDynamic( + MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_) + : variant_column(std::move(variant_column_)) + , variant_column_ptr(assert_cast(variant_column.get())) , variant_info(variant_info_) , max_dynamic_types(max_dynamic_types_) + , global_max_dynamic_types(global_max_dynamic_types_) , statistics(statistics_) { } -ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_) +void ColumnDynamic::setVariantType(const DataTypePtr & variant_type) +{ + if (variant_column && !empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting specific variant type is allowed only for empty dynamic column"); + + variant_column = variant_type->createColumn(); + variant_column_ptr = assert_cast(variant_column.get()); + createVariantInfo(variant_type); +} + +void ColumnDynamic::setMaxDynamicPaths(size_t max_dynamic_type_) +{ + if (variant_column && !empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting specific max_dynamic_type parameter is allowed only for empty dynamic column"); + + max_dynamic_types = max_dynamic_type_; +} + +void ColumnDynamic::createVariantInfo(const DataTypePtr & variant_type) { - VariantInfo variant_info; variant_info.variant_type = variant_type; variant_info.variant_name = variant_type->getName(); const auto & variants = assert_cast(*variant_type).getVariants(); + variant_info.variant_names.clear(); variant_info.variant_names.reserve(variants.size()); + variant_info.variant_name_to_discriminator.clear(); variant_info.variant_name_to_discriminator.reserve(variants.size()); for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr) { @@ -54,30 +107,26 @@ ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, variant_info.variant_name_to_discriminator[variant_name] = discr; } - return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_); + if (!variant_info.variant_name_to_discriminator.contains(getSharedVariantTypeName())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Variant in Dynamic column doesn't contain shared variant"); } -bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) +bool ColumnDynamic::addNewVariant(const DataTypePtr & new_variant, const String & new_variant_name) { /// Check if we already have such variant. - if (variant_info.variant_name_to_discriminator.contains(new_variant->getName())) + if (variant_info.variant_name_to_discriminator.contains(new_variant_name)) return true; /// Check if we reached maximum number of variants. - if (variant_info.variant_names.size() >= max_dynamic_types) + if (!canAddNewVariant()) { - /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant. - /// Otherwise we won't be able to cast new variants to Strings. - if (!variant_info.variant_name_to_discriminator.contains("String")) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists"); + /// Dynamic column should always have shared variant. + if (!variant_info.variant_name_to_discriminator.contains(getSharedVariantTypeName())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no shared variant exists"); return false; } - /// If we have (max_dynamic_types - 1) number of variants and don't have String variant, we can add only String variant. - if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String")) - return false; - const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); DataTypes all_variants = current_variants; all_variants.push_back(new_variant); @@ -86,21 +135,15 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) return true; } -void ColumnDynamic::addStringVariant() +void extendVariantColumn( + IColumn & variant_column, + const DataTypePtr & old_variant_type, + const DataTypePtr & new_variant_type, + std::unordered_map old_variant_name_to_discriminator) { - if (!addNewVariant(std::make_shared())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add String variant to Dynamic column, it's a bug"); -} - -void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type) -{ - const DataTypes & current_variants = assert_cast(variant_info.variant_type.get())->getVariants(); + const DataTypes & current_variants = assert_cast(old_variant_type.get())->getVariants(); const DataTypes & new_variants = assert_cast(new_variant_type.get())->getVariants(); - Names new_variant_names; - new_variant_names.reserve(new_variants.size()); - std::unordered_map new_variant_name_to_discriminator; - new_variant_name_to_discriminator.reserve(new_variants.size()); std::vector> new_variant_columns_and_discriminators_to_add; new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size()); std::vector current_to_new_discriminators; @@ -108,26 +151,26 @@ void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePt for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) { - const auto & name = new_variant_names.emplace_back(new_variants[discr]->getName()); - new_variant_name_to_discriminator[name] = discr; - - auto current_it = variant_info.variant_name_to_discriminator.find(name); - if (current_it == variant_info.variant_name_to_discriminator.end()) + auto current_it = old_variant_name_to_discriminator.find(new_variants[discr]->getName()); + if (current_it == old_variant_name_to_discriminator.end()) new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr); else current_to_new_discriminators[current_it->second] = discr; } - variant_info.variant_type = new_variant_type; - variant_info.variant_name = new_variant_type->getName(); - variant_info.variant_names = new_variant_names; - variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator; - assert_cast(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); + assert_cast(variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); +} + +void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type) +{ + extendVariantColumn(*variant_column, variant_info.variant_type, new_variant_type, variant_info.variant_name_to_discriminator); + createVariantInfo(new_variant_type); + /// Clear mappings cache because now with new Variant we will have new mappings. variant_mappings_cache.clear(); } -std::vector * ColumnDynamic::combineVariants(const DB::ColumnDynamic::VariantInfo & other_variant_info) +std::vector * ColumnDynamic::combineVariants(const ColumnDynamic::VariantInfo & other_variant_info) { /// Check if we already have global discriminators mapping for other Variant in cache. /// It's used to not calculate the same mapping each call of insertFrom with the same columns. @@ -154,20 +197,13 @@ std::vector * ColumnDynamic::combineVariants(const const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); /// We cannot combine Variants if total number of variants exceeds max_dynamic_types. - if (current_variants.size() + num_new_variants > max_dynamic_types) + if (!canAddNewVariants(num_new_variants)) { /// Remember that we cannot combine our variant with this one, so we will not try to do it again. variants_with_failed_combination.insert(other_variant_info.variant_name); return nullptr; } - /// We cannot combine Variants if total number of variants reaches max_dynamic_types and we don't have String variant. - if (current_variants.size() + num_new_variants == max_dynamic_types && !variant_info.variant_name_to_discriminator.contains("String") && !other_variant_info.variant_name_to_discriminator.contains("String")) - { - variants_with_failed_combination.insert(other_variant_info.variant_name); - return nullptr; - } - DataTypes all_variants = current_variants; all_variants.insert(all_variants.end(), other_variants.begin(), other_variants.end()); auto new_variant_type = std::make_shared(all_variants); @@ -185,40 +221,93 @@ std::vector * ColumnDynamic::combineVariants(const return &it->second; } -void ColumnDynamic::insert(const DB::Field & x) +void ColumnDynamic::insert(const Field & x) { - /// Check if we can insert field without Variant extension. - if (variant_column->tryInsert(x)) + if (x.isNull()) + { + insertDefault(); return; + } + + auto & variant_col = getVariantColumn(); + auto shared_variant_discr = getSharedVariantDiscriminator(); + /// Check if we can insert field into existing variants and avoid Variant extension. + for (size_t i = 0; i != variant_col.getNumVariants(); ++i) + { + if (i != shared_variant_discr && variant_col.getVariantByGlobalDiscriminator(i).tryInsert(x)) + { + variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(i)); + variant_col.getOffsets().push_back(variant_col.getVariantByGlobalDiscriminator(i).size() - 1); + return; + } + } /// If we cannot insert field into current variant column, extend it with new variant for this field from its type. - if (addNewVariant(applyVisitor(FieldToDataType(), x))) + auto field_data_type = applyVisitor(FieldToDataType(), x); + auto field_data_type_name = field_data_type->getName(); + if (addNewVariant(field_data_type, field_data_type_name)) { - /// Now we should be able to insert this field into extended variant column. - variant_column->insert(x); + /// Insert this field into newly added variant. + auto discr = variant_info.variant_name_to_discriminator[field_data_type_name]; + variant_col.getVariantByGlobalDiscriminator(discr).insert(x); + variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(discr)); + variant_col.getOffsets().push_back(variant_col.getVariantByGlobalDiscriminator(discr).size() - 1); } else { /// We reached maximum number of variants and couldn't add new variant. - /// This case should be really rare in real use cases. - /// We should always be able to add String variant and cast inserted value to String. - addStringVariant(); - variant_column->insert(toString(x)); + /// In this case we add the value of this new variant into special shared variant. + /// We store values in shared variant in binary form with binary encoded type. + auto & shared_variant = getSharedVariant(); + auto & chars = shared_variant.getChars(); + WriteBufferFromVector value_buf(chars, AppendModeTag()); + encodeDataType(field_data_type, value_buf); + getVariantSerialization(field_data_type, field_data_type_name)->serializeBinary(x, value_buf, getFormatSettings()); + value_buf.finalize(); + chars.push_back(0); + shared_variant.getOffsets().push_back(chars.size()); + variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(shared_variant_discr)); + variant_col.getOffsets().push_back(shared_variant.size() - 1); } } -bool ColumnDynamic::tryInsert(const DB::Field & x) +bool ColumnDynamic::tryInsert(const Field & x) { /// We can insert any value into Dynamic column. insert(x); return true; } +Field ColumnDynamic::operator[](size_t n) const +{ + Field res; + get(n, res); + return res; +} + +void ColumnDynamic::get(size_t n, Field & res) const +{ + const auto & variant_col = getVariantColumn(); + /// Check if value is not in shared variant. + if (variant_col.globalDiscriminatorAt(n) != getSharedVariantDiscriminator()) + { + variant_col.get(n, res); + return; + } + + /// We should deeserialize value from shared variant. + const auto & shared_variant = getSharedVariant(); + auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n)); + ReadBufferFromMemory buf(value_data.data, value_data.size); + auto type = decodeDataType(buf); + getVariantSerialization(type)->deserializeBinary(res, buf, getFormatSettings()); +} + #if !defined(DEBUG_OR_SANITIZER_BUILD) -void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +void ColumnDynamic::insertFrom(const IColumn & src_, size_t n) #else -void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) +void ColumnDynamic::doInsertFrom(const IColumn & src_, size_t n) #endif { const auto & dynamic_src = assert_cast(src_); @@ -226,11 +315,33 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) /// Check if we have the same variants in both columns. if (variant_info.variant_name == dynamic_src.variant_info.variant_name) { - variant_column->insertFrom(*dynamic_src.variant_column, n); + variant_column_ptr->insertFrom(*dynamic_src.variant_column, n); return; } - auto & variant_col = assert_cast(*variant_column); + auto & variant_col = getVariantColumn(); + const auto & src_variant_col = dynamic_src.getVariantColumn(); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(n); + auto src_offset = src_variant_col.offsetAt(n); + + /// Check if we insert from shared variant and process it separately. + if (src_global_discr == dynamic_src.getSharedVariantDiscriminator()) + { + const auto & src_shared_variant = dynamic_src.getSharedVariant(); + auto value = src_shared_variant.getDataAt(src_offset); + /// Decode data type of this value. + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + /// Check if we have this variant and deserialize value into variant from shared variant data. + if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end()) + variant_col.deserializeBinaryIntoVariant(it->second, getVariantSerialization(type, type_name), buf, getFormatSettings()); + /// Otherwise just insert it into our shared variant. + else + variant_col.insertIntoVariantFrom(getSharedVariantDiscriminator(), src_shared_variant, src_offset); + + return; + } /// If variants are different, we need to extend our variant with new variants. if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) @@ -241,8 +352,6 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) /// We cannot combine 2 Variant types as total number of variants exceeds the limit. /// We need to insert single value, try to add only corresponding variant. - const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); - auto src_global_discr = src_variant_col.globalDiscriminatorAt(n); /// NULL doesn't require Variant extension. if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) @@ -260,19 +369,18 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) } /// We reached maximum number of variants and couldn't add new variant. - /// We should always be able to add String variant and cast inserted value to String. - addStringVariant(); - auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); - tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n)); - auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); - auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; - variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); + /// Insert this value into shared variant. + insertValueIntoSharedVariant( + src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), + variant_type, + dynamic_src.variant_info.variant_names[src_global_discr], + src_offset); } #if !defined(DEBUG_OR_SANITIZER_BUILD) -void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +void ColumnDynamic::insertRangeFrom(const IColumn & src_, size_t start, size_t length) #else -void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) #endif { if (start + length > src_.size()) @@ -280,181 +388,260 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si "[start({}) + length({}) > src.size()({})]", start, length, src_.size()); const auto & dynamic_src = assert_cast(src_); + auto & variant_col = getVariantColumn(); /// Check if we have the same variants in both columns. if (variant_info.variant_names == dynamic_src.variant_info.variant_names) { - variant_column->insertRangeFrom(*dynamic_src.variant_column, start, length); + variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length); return; } - auto & variant_col = assert_cast(*variant_column); - /// If variants are different, we need to extend our variant with new variants. if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) { - variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping); + size_t prev_size = variant_col.size(); + auto shared_variant_discr = getSharedVariantDiscriminator(); + variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping, shared_variant_discr); + + /// We should process insertion from src shared variant separately, because it can contain + /// values that should be extracted into our variants. insertRangeFrom above didn't insert + /// values into our shared variant (we specified shared_variant_discr as special skip discriminator). + + /// Check if src shared variant is empty, nothing to do in this case. + if (dynamic_src.getSharedVariant().empty()) + return; + + /// Iterate over src discriminators and process insertion from src shared variant. + const auto & src_variant_column = dynamic_src.getVariantColumn(); + const auto src_shared_variant_discr = dynamic_src.getSharedVariantDiscriminator(); + const auto src_shared_variant_local_discr = src_variant_column.localDiscriminatorByGlobal(src_shared_variant_discr); + const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators(); + const auto & src_offsets = src_variant_column.getOffsets(); + const auto & src_shared_variant = assert_cast(src_variant_column.getVariantByLocalDiscriminator(src_shared_variant_local_discr)); + + auto & local_discriminators = variant_col.getLocalDiscriminators(); + auto & offsets = variant_col.getOffsets(); + const auto shared_variant_local_discr = variant_col.localDiscriminatorByGlobal(shared_variant_discr); + auto & shared_variant = assert_cast(variant_col.getVariantByLocalDiscriminator(shared_variant_local_discr)); + for (size_t i = 0; i != length; ++i) + { + if (src_local_discriminators[start + i] == src_shared_variant_local_discr) + { + chassert(local_discriminators[prev_size + i] == shared_variant_local_discr); + auto value = src_shared_variant.getDataAt(src_offsets[start + i]); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + /// Check if we have variant with this type. In this case we should extract + /// the value from src shared variant and insert it into this variant. + if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end()) + { + auto local_discr = variant_col.localDiscriminatorByGlobal(it->second); + auto & variant = variant_col.getVariantByLocalDiscriminator(local_discr); + getVariantSerialization(type, type_name)->deserializeBinary(variant, buf, getFormatSettings()); + /// Local discriminators were already filled in ColumnVariant::insertRangeFrom and this row should contain + /// shared_variant_local_discr. Change it to local discriminator of the found variant and update offsets. + local_discriminators[prev_size + i] = local_discr; + offsets[prev_size + i] = variant.size() - 1; + } + /// Otherwise, insert this value into shared variant. + else + { + shared_variant.insertData(value.data, value.size); + /// Update variant offset. + offsets[prev_size + i] = shared_variant.size() - 1; + } + } + } + return; } /// We cannot combine 2 Variant types as total number of variants exceeds the limit. - /// In this case we will add most frequent variants from this range and insert them as usual, - /// all other variants will be converted to String. - /// TODO: instead of keeping all current variants and just adding new most frequent variants - /// from source columns we can also try to replace rarest existing variants with frequent - /// variants from source column (so we will avoid casting new frequent variants to String - /// and keeping rare existing ones). It will require rewriting of existing data in Variant - /// column but will improve usability of Dynamic column for example during squashing blocks - /// during insert. - - const auto & src_variant_column = dynamic_src.getVariantColumn(); - - /// Calculate ranges for each variant in current range. - std::vector> variants_ranges(dynamic_src.variant_info.variant_names.size(), {0, 0}); - /// If we insert the whole column, no need to iterate through the range, we can just take variant sizes. - if (start == 0 && length == dynamic_src.size()) - { - for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) - variants_ranges[i] = {0, src_variant_column.getVariantByGlobalDiscriminator(i).size()}; - } - /// Otherwise we need to iterate through discriminators and calculate the range for each variant. - else - { - const auto & local_discriminators = src_variant_column.getLocalDiscriminators(); - const auto & offsets = src_variant_column.getOffsets(); - size_t end = start + length; - for (size_t i = start; i != end; ++i) - { - auto discr = src_variant_column.globalDiscriminatorByLocal(local_discriminators[i]); - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - { - if (!variants_ranges[discr].second) - variants_ranges[discr].first = offsets[i]; - ++variants_ranges[discr].second; - } - } - } - + /// In this case we will add most frequent variants and insert them as usual, + /// all other variants will be inserted into shared variant. const auto & src_variants = assert_cast(*dynamic_src.variant_info.variant_type).getVariants(); - /// List of variants that will be converted to String. - std::vector variants_to_convert_to_string; /// Mapping from global discriminators of src_variant to the new variant we will create. std::vector other_to_new_discriminators; other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size()); - /// Check if we cannot add any more new variants. In this case we will convert all new variants to String. - if (variant_info.variant_names.size() == max_dynamic_types || (variant_info.variant_names.size() == max_dynamic_types - 1 && !variant_info.variant_name_to_discriminator.contains("String"))) + /// Check if we cannot add any more new variants. In this case we will insert all new variants into shared variant. + if (!canAddNewVariant()) { - addStringVariant(); - for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) - { - auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]); - if (it == variant_info.variant_name_to_discriminator.end()) - { - variants_to_convert_to_string.push_back(i); - other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator["String"]); - } - else - { - other_to_new_discriminators.push_back(it->second); - } - } - } - /// We still can add some new variants, but not all of them. Let's choose the most frequent variants in specified range. - else - { - std::vector> new_variants_with_sizes; - new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size()); - for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) - { - const auto & variant_name = dynamic_src.variant_info.variant_names[i]; - if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name)) - new_variants_with_sizes.emplace_back(variants_ranges[i].second, i); - } - - std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater()); - DataTypes new_variants = assert_cast(*variant_info.variant_type).getVariants(); - if (!variant_info.variant_name_to_discriminator.contains("String")) - new_variants.push_back(std::make_shared()); - - for (const auto & [_, discr] : new_variants_with_sizes) - { - if (new_variants.size() != max_dynamic_types) - new_variants.push_back(src_variants[discr]); - else - variants_to_convert_to_string.push_back(discr); - } - - auto new_variant_type = std::make_shared(new_variants); - updateVariantInfoAndExpandVariantColumn(new_variant_type); - auto string_variant_discriminator = variant_info.variant_name_to_discriminator.at("String"); + auto shared_variant_discr = getSharedVariantDiscriminator(); for (const auto & variant_name : dynamic_src.variant_info.variant_names) { auto it = variant_info.variant_name_to_discriminator.find(variant_name); if (it == variant_info.variant_name_to_discriminator.end()) - other_to_new_discriminators.push_back(string_variant_discriminator); + other_to_new_discriminators.push_back(shared_variant_discr); + else + other_to_new_discriminators.push_back(it->second); + } + } + /// We still can add some new variants, but not all of them. Let's choose the most frequent variants. + else + { + /// Create list of pairs and sort it. + std::vector> new_variants_with_sizes; + new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size()); + const auto & src_variant_column = dynamic_src.getVariantColumn(); + for (const auto & [name, discr] : dynamic_src.variant_info.variant_name_to_discriminator) + { + if (!variant_info.variant_name_to_discriminator.contains(name)) + new_variants_with_sizes.emplace_back(src_variant_column.getVariantByGlobalDiscriminator(discr).size(), discr); + } + + std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater()); + DataTypes new_variants = assert_cast(*variant_info.variant_type).getVariants(); + /// Add new variants from sorted list until we reach max_dynamic_types. + for (const auto & [_, discr] : new_variants_with_sizes) + { + if (!canAddNewVariant(new_variants.size())) + break; + new_variants.push_back(src_variants[discr]); + } + + auto new_variant_type = std::make_shared(new_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + auto shared_variant_discr = getSharedVariantDiscriminator(); + for (const auto & variant_name : dynamic_src.variant_info.variant_names) + { + auto it = variant_info.variant_name_to_discriminator.find(variant_name); + if (it == variant_info.variant_name_to_discriminator.end()) + other_to_new_discriminators.push_back(shared_variant_discr); else other_to_new_discriminators.push_back(it->second); } } - /// Convert to String all variants that couldn't be added. - std::unordered_map variants_converted_to_string; - variants_converted_to_string.reserve(variants_to_convert_to_string.size()); - for (auto discr : variants_to_convert_to_string) - { - auto [variant_start, variant_length] = variants_ranges[discr]; - const auto & variant = src_variant_column.getVariantPtrByGlobalDiscriminator(discr); - if (variant_start == 0 && variant_length == variant->size()) - variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant, src_variants[discr], ""), std::make_shared()); - else - variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant->cut(variant_start, variant_length), src_variants[discr], ""), std::make_shared()); - } - + /// Iterate over the range and perform insertion. + const auto & src_variant_column = dynamic_src.getVariantColumn(); const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators(); const auto & src_offsets = src_variant_column.getOffsets(); const auto & src_variant_columns = src_variant_column.getVariants(); + const auto src_shared_variant_discr = dynamic_src.getSharedVariantDiscriminator(); + const auto src_shared_variant_local_discr = src_variant_column.localDiscriminatorByGlobal(src_shared_variant_discr); + const auto & src_shared_variant = assert_cast(*src_variant_columns[src_shared_variant_local_discr]); + auto & local_discriminators = variant_col.getLocalDiscriminators(); + local_discriminators.reserve(local_discriminators.size() + length); + auto & offsets = variant_col.getOffsets(); + offsets.reserve(offsets.size() + length); + auto & variant_columns = variant_col.getVariants(); + const auto shared_variant_discr = getSharedVariantDiscriminator(); + const auto shared_variant_local_discr = variant_col.localDiscriminatorByGlobal(shared_variant_discr); + auto & shared_variant = assert_cast(*variant_columns[shared_variant_local_discr]); size_t end = start + length; for (size_t i = start; i != end; ++i) { - auto local_discr = src_local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + auto src_local_discr = src_local_discriminators[i]; + auto src_offset = src_offsets[i]; + if (src_local_discr == ColumnVariant::NULL_DISCRIMINATOR) { - variant_col.insertDefault(); + local_discriminators.push_back(ColumnVariant::NULL_DISCRIMINATOR); + offsets.emplace_back(); } else { - auto global_discr = src_variant_column.globalDiscriminatorByLocal(local_discr); - auto to_global_discr = other_to_new_discriminators[global_discr]; - auto it = variants_converted_to_string.find(global_discr); - if (it == variants_converted_to_string.end()) + /// Process insertion from src shared variant separately. + if (src_local_discr == src_shared_variant_local_discr) { - variant_col.insertIntoVariantFrom(to_global_discr, *src_variant_columns[local_discr], src_offsets[i]); + auto value = src_shared_variant.getDataAt(src_offset); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + /// Check if we have variant with this type. In this case we should extract + /// the value from src shared variant and insert it into this variant. + if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end()) + { + auto local_discr = variant_col.localDiscriminatorByGlobal(it->second); + getVariantSerialization(type, type_name)->deserializeBinary(*variant_columns[local_discr], buf, getFormatSettings()); + local_discriminators.push_back(local_discr); + offsets.push_back(variant_columns[local_discr]->size() - 1); + } + /// Otherwise, insert this value into shared variant. + else + { + shared_variant.insertData(value.data, value.size); + local_discriminators.push_back(shared_variant_local_discr); + offsets.push_back(shared_variant.size() - 1); + } } + /// Insertion from usual variant. else { - variant_col.insertIntoVariantFrom(to_global_discr, *it->second, src_offsets[i] - variants_ranges[global_discr].first); + auto src_global_discr = src_variant_column.globalDiscriminatorByLocal(src_local_discr); + auto global_discr = other_to_new_discriminators[src_global_discr]; + /// Check if we need to insert this value into shared variant. + if (global_discr == shared_variant_discr) + { + serializeValueIntoSharedVariant( + shared_variant, + *src_variant_columns[src_local_discr], + src_variants[src_global_discr], + getVariantSerialization(src_variants[src_global_discr], dynamic_src.variant_info.variant_names[src_global_discr]), + src_offset); + local_discriminators.push_back(shared_variant_local_discr); + offsets.push_back(shared_variant.size() - 1); + } + else + { + auto local_discr = variant_col.localDiscriminatorByGlobal(global_discr); + variant_columns[local_discr]->insertFrom(*src_variant_columns[src_local_discr], src_offset); + local_discriminators.push_back(local_discr); + offsets.push_back(variant_columns[local_discr]->size() - 1); + } } } } } #if !defined(DEBUG_OR_SANITIZER_BUILD) -void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnDynamic::insertManyFrom(const IColumn & src_, size_t position, size_t length) #else -void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnDynamic::doInsertManyFrom(const IColumn & src_, size_t position, size_t length) #endif { const auto & dynamic_src = assert_cast(src_); + auto & variant_col = getVariantColumn(); /// Check if we have the same variants in both columns. if (variant_info.variant_names == dynamic_src.variant_info.variant_names) { - variant_column->insertManyFrom(*dynamic_src.variant_column, position, length); + variant_col.insertManyFrom(*dynamic_src.variant_column, position, length); return; } - auto & variant_col = assert_cast(*variant_column); + const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(position); + auto src_offset = src_variant_col.offsetAt(position); + + /// Check if we insert from shared variant and process it separately. + if (src_global_discr == dynamic_src.getSharedVariantDiscriminator()) + { + const auto & src_shared_variant = dynamic_src.getSharedVariant(); + auto value = src_shared_variant.getDataAt(src_offset); + /// Decode data type of this value. + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + /// Check if we have this variant and deserialize value into variant from shared variant data. + if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end()) + { + /// Deserialize value into temporary column and use it in insertManyIntoVariantFrom. + auto tmp_column = type->createColumn(); + tmp_column->reserve(1); + getVariantSerialization(type, type_name)->deserializeBinary(*tmp_column, buf, getFormatSettings()); + variant_col.insertManyIntoVariantFrom(it->second, *tmp_column, 0, length); + } + /// Otherwise just insert it into our shared variant. + else + { + variant_col.insertManyIntoVariantFrom(getSharedVariantDiscriminator(), src_shared_variant, src_offset, length); + } + + return; + } /// If variants are different, we need to extend our variant with new variants. if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) @@ -465,8 +652,6 @@ void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, /// We cannot combine 2 Variant types as total number of variants exceeds the limit. /// We need to insert single value, try to add only corresponding variant. - const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); - auto src_global_discr = src_variant_col.globalDiscriminatorAt(position); if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) { insertDefault(); @@ -481,21 +666,51 @@ void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, return; } - addStringVariant(); - auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); - tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position)); - auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); - auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; - variant_col.insertManyIntoVariantFrom(string_variant_discr, *tmp_string_column, 0, length); + /// We reached maximum number of variants and couldn't add new variant. + /// Insert this value into shared variant. + /// Create temporary string column, serialize value into it and use it in insertManyIntoVariantFrom. + auto tmp_shared_variant = ColumnString::create(); + serializeValueIntoSharedVariant( + *tmp_shared_variant, + src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), + variant_type, + getVariantSerialization(variant_type, dynamic_src.variant_info.variant_names[src_global_discr]), + src_offset); + + variant_col.insertManyIntoVariantFrom(getSharedVariantDiscriminator(), *tmp_shared_variant, 0, length); } +void ColumnDynamic::insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n) +{ + auto & variant_col = getVariantColumn(); + auto & shared_variant = getSharedVariant(); + serializeValueIntoSharedVariant(shared_variant, src, type, getVariantSerialization(type, type_name), n); + variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(getSharedVariantDiscriminator())); + variant_col.getOffsets().push_back(shared_variant.size() - 1); +} -StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, const char *& begin) const +void ColumnDynamic::serializeValueIntoSharedVariant( + ColumnString & shared_variant, + const IColumn & src, + const DataTypePtr & type, + const SerializationPtr & serialization, + size_t n) +{ + auto & chars = shared_variant.getChars(); + WriteBufferFromVector value_buf(chars, AppendModeTag()); + encodeDataType(type, value_buf); + serialization->serializeBinary(src, n, value_buf, getFormatSettings()); + value_buf.finalize(); + chars.push_back(0); + shared_variant.getOffsets().push_back(chars.size()); +} + +StringRef ColumnDynamic::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const { /// We cannot use Variant serialization here as it serializes discriminator + value, /// but Dynamic doesn't have fixed mapping discriminator <-> variant type /// as different Dynamic column can have different Variants. - /// Instead, we serialize null bit + variant type in binary format (size + bytes) + value. + /// Instead, we serialize null bit + variant type and value in binary format (size + data). const auto & variant_col = assert_cast(*variant_column); auto discr = variant_col.globalDiscriminatorAt(n); StringRef res; @@ -509,25 +724,35 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co return res; } - const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(discr); - String variant_type_binary_data = encodeDataType(variant_type); - size_t variant_type_binary_data_size = variant_type_binary_data.size(); - char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(), begin); - memcpy(pos, &null_bit, sizeof(UInt8)); - memcpy(pos + sizeof(UInt8), &variant_type_binary_data_size, sizeof(size_t)); - memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_type_binary_data.data(), variant_type_binary_data.size()); - res.data = pos; - res.size = sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(); + WriteBufferFromOwnString buf; + StringRef type_and_value; + /// If we have value from shared variant, it's already stored in the desired format. + if (discr == getSharedVariantDiscriminator()) + { + type_and_value = getSharedVariant().getDataAt(variant_col.offsetAt(n)); + } + /// For regular variants serialize its type and value in binary format. + else + { + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(discr); + encodeDataType(variant_type, buf); + getVariantSerialization(variant_type, variant_info.variant_names[discr]) + ->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings()); + type_and_value = buf.str(); + } - auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); - res.data = value_ref.data - res.size; - res.size += value_ref.size; + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + type_and_value.size, begin); + memcpy(pos, &null_bit, sizeof(UInt8)); + memcpy(pos + sizeof(UInt8), &type_and_value.size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), type_and_value.data, type_and_value.size); + res.data = pos; + res.size = sizeof(UInt8) + sizeof(size_t) + type_and_value.size; return res; } const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) { - auto & variant_col = assert_cast(*variant_column); + auto & variant_col = getVariantColumn(); UInt8 null_bit = unalignedLoad(pos); pos += sizeof(UInt8); if (null_bit) @@ -536,39 +761,36 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) return pos; } - /// Read variant type in binary format. - const size_t variant_type_binary_data_size = unalignedLoad(pos); - pos += sizeof(variant_type_binary_data_size); - String variant_type_binary_data; - variant_type_binary_data.resize(variant_type_binary_data_size); - memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); - pos += variant_type_binary_data_size; - auto variant_type = decodeDataType(variant_type_binary_data); + /// Read variant type and value in binary format. + const size_t type_and_value_size = unalignedLoad(pos); + pos += sizeof(type_and_value_size); + std::string_view type_and_value(pos, type_and_value_size); + pos += type_and_value_size; + + ReadBufferFromMemory buf(type_and_value.data(), type_and_value.size()); + auto variant_type = decodeDataType(buf); auto variant_name = variant_type->getName(); /// If we already have such variant, just deserialize it into corresponding variant column. auto it = variant_info.variant_name_to_discriminator.find(variant_name); if (it != variant_info.variant_name_to_discriminator.end()) { - auto discr = it->second; - return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + variant_col.deserializeBinaryIntoVariant(it->second, getVariantSerialization(variant_type, variant_name), buf, getFormatSettings()); } - - /// If we don't have such variant, add it. - if (likely(addNewVariant(variant_type))) + /// If we don't have such variant, try to add it. + else if (likely(addNewVariant(variant_type))) { auto discr = variant_info.variant_name_to_discriminator[variant_name]; - return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + variant_col.deserializeBinaryIntoVariant(discr, getVariantSerialization(variant_type, variant_name), buf, getFormatSettings()); + } + /// Otherwise insert this value into shared variant. + else + { + auto & shared_variant = getSharedVariant(); + shared_variant.insertData(type_and_value.data(), type_and_value.size()); + variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(getSharedVariantDiscriminator())); + variant_col.getOffsets().push_back(shared_variant.size() - 1); } - /// We reached maximum number of variants and couldn't add new variant. - /// We should always be able to add String variant and cast inserted value to String. - addStringVariant(); - /// Create temporary column of this variant type and deserialize value into it. - auto tmp_variant_column = variant_type->createColumn(); - pos = tmp_variant_column->deserializeAndInsertFromArena(pos); - /// Cast temporary column to String and insert this value into String variant. - auto str_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); - variant_col.insertIntoVariantFrom(variant_info.variant_name_to_discriminator["String"], *str_column, 0); return pos; } @@ -579,19 +801,15 @@ const char * ColumnDynamic::skipSerializedInArena(const char * pos) const if (null_bit) return pos; - const size_t variant_type_binary_data_size = unalignedLoad(pos); - pos += sizeof(variant_type_binary_data_size); - String variant_type_binary_data; - variant_type_binary_data.resize(variant_type_binary_data_size); - memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); - pos += variant_type_binary_data_size; - auto tmp_variant_column = decodeDataType(variant_type_binary_data)->createColumn(); - return tmp_variant_column->skipSerializedInArena(pos); + const size_t type_and_value_size = unalignedLoad(pos); + pos += sizeof(type_and_value_size); + pos += type_and_value_size; + return pos; } void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const { - const auto & variant_col = assert_cast(*variant_column); + const auto & variant_col = getVariantColumn(); auto discr = variant_col.globalDiscriminatorAt(n); if (discr == ColumnVariant::NULL_DISCRIMINATOR) { @@ -604,17 +822,19 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const } #if !defined(DEBUG_OR_SANITIZER_BUILD) -int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +int ColumnDynamic::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else -int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #endif { - const auto & left_variant = assert_cast(*variant_column); + const auto & left_variant = getVariantColumn(); const auto & right_dynamic = assert_cast(rhs); - const auto & right_variant = assert_cast(*right_dynamic.variant_column); + const auto & right_variant = right_dynamic.getVariantColumn(); auto left_discr = left_variant.globalDiscriminatorAt(n); + auto left_shared_variant_discr = getSharedVariantDiscriminator(); auto right_discr = right_variant.globalDiscriminatorAt(m); + auto right_shared_variant_discr = right_dynamic.getSharedVariantDiscriminator(); /// Check if we have NULLs and return result based on nan_direction_hint. if (left_discr == ColumnVariant::NULL_DISCRIMINATOR && right_discr == ColumnVariant::NULL_DISCRIMINATOR) @@ -624,25 +844,265 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int else if (right_discr == ColumnVariant::NULL_DISCRIMINATOR) return -nan_direction_hint; - /// If rows have different types, we compare type names. - if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr]) - return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1; + /// Check if both values are in shared variant. + if (left_discr == left_shared_variant_discr && right_discr == right_shared_variant_discr) + { + /// First check if both type and value are equal. + auto left_value = getSharedVariant().getDataAt(left_variant.offsetAt(n)); + auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m)); + if (left_value == right_value) + return 0; - /// If rows have the same types, compare actual values from corresponding variants. - return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); + /// Extract type names from both values. + ReadBufferFromMemory buf_left(left_value.data, left_value.size); + auto left_data_type = decodeDataType(buf_left); + auto left_data_type_name = left_data_type->getName(); + + ReadBufferFromMemory buf_right(right_value.data, right_value.size); + auto right_data_type = decodeDataType(buf_right); + auto right_data_type_name = right_data_type->getName(); + + /// If rows have different types, we compare type names. + if (left_data_type_name != right_data_type_name) + return left_data_type_name < right_data_type_name ? -1 : 1; + + /// If rows have the same type, we compare actual values. + /// We have both values serialized in binary format, so we need to + /// create temporary column, insert both values into it and compare. + auto tmp_column = left_data_type->createColumn(); + const auto & serialization = getVariantSerialization(left_data_type, left_data_type_name); + serialization->deserializeBinary(*tmp_column, buf_left, getFormatSettings()); + serialization->deserializeBinary(*tmp_column, buf_right, getFormatSettings()); + return tmp_column->compareAt(0, 1, *tmp_column, nan_direction_hint); + } + /// Check if only left value is in shared data. + else if (left_discr == left_shared_variant_discr) + { + /// Extract left type name from the value. + auto left_value = getSharedVariant().getDataAt(left_variant.offsetAt(n)); + ReadBufferFromMemory buf_left(left_value.data, left_value.size); + auto left_data_type = decodeDataType(buf_left); + auto left_data_type_name = left_data_type->getName(); + + /// If rows have different types, we compare type names. + if (left_data_type_name != right_dynamic.variant_info.variant_names[right_discr]) + return left_data_type_name < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1; + + /// If rows have the same type, we compare actual values. + /// We have left value serialized in binary format, we need to + /// create temporary column, insert the value into it and compare. + auto tmp_column = left_data_type->createColumn(); + getVariantSerialization(left_data_type, left_data_type_name)->deserializeBinary(*tmp_column, buf_left, getFormatSettings()); + return tmp_column->compareAt(0, right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); + } + /// Check if only right value is in shared data. + else if (right_discr == right_shared_variant_discr) + { + /// Extract right type name from the value. + auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m)); + ReadBufferFromMemory buf_right(right_value.data, right_value.size); + auto right_data_type = decodeDataType(buf_right); + auto right_data_type_name = right_data_type->getName(); + + /// If rows have different types, we compare type names. + if (variant_info.variant_names[left_discr] != right_data_type_name) + return variant_info.variant_names[left_discr] < right_data_type_name ? -1 : 1; + + /// If rows have the same type, we compare actual values. + /// We have right value serialized in binary format, we need to + /// create temporary column, insert the value into it and compare. + auto tmp_column = right_data_type->createColumn(); + getVariantSerialization(right_data_type, right_data_type_name)->deserializeBinary(*tmp_column, buf_right, getFormatSettings()); + return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), 0, *tmp_column, nan_direction_hint); + } + /// Otherwise both values are regular variants. + else + { + /// If rows have different types, we compare type names. + if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr]) + return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1; + + /// If rows have the same types, compare actual values from corresponding variants. + return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); + } +} + +struct ColumnDynamic::ComparatorBase +{ + const ColumnDynamic & parent; + int nan_direction_hint; + + ComparatorBase(const ColumnDynamic & parent_, int nan_direction_hint_) + : parent(parent_), nan_direction_hint(nan_direction_hint_) + { + } + + ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const + { + return parent.compareAt(lhs, rhs, parent, nan_direction_hint); + } +}; + +void ColumnDynamic::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const +{ + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) + getPermutationImpl(limit, res, ComparatorAscendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) + getPermutationImpl(limit, res, ComparatorAscendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) + getPermutationImpl(limit, res, ComparatorDescendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable) + getPermutationImpl(limit, res, ComparatorDescendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); +} + +void ColumnDynamic::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, DB::EqualRanges & equal_ranges) const +{ + auto comparator_equal = ComparatorEqual(*this, nan_direction_hint); + + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) + updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) + updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) + updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable) + updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort()); } ColumnPtr ColumnDynamic::compress() const { - ColumnPtr variant_compressed = variant_column->compress(); + ColumnPtr variant_compressed = variant_column_ptr->compress(); size_t byte_size = variant_compressed->byteSize(); return ColumnCompressed::create(size(), byte_size, - [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_statistics = statistics]() mutable + [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_global_max_dynamic_types = global_max_dynamic_types, my_statistics = statistics]() mutable { - return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_statistics); + return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_global_max_dynamic_types, my_statistics); }); } +void ColumnDynamic::prepareForSquashing(const Columns & source_columns) +{ + if (source_columns.empty()) + return; + + /// Internal variants of source dynamic columns may differ. + /// We want to preallocate memory for all variants we will have after squashing. + /// It may happen that the total number of variants in source columns will + /// exceed the limit, in this case we will choose the most frequent variants + /// and insert the rest types into the shared variant. + + /// First, preallocate memory for variant discriminators and offsets. + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + auto & variant_col = getVariantColumn(); + variant_col.getLocalDiscriminators().reserve_exact(new_size); + variant_col.getOffsets().reserve_exact(new_size); + + /// Second, preallocate memory for variants. + prepareVariantsForSquashing(source_columns); +} + +void ColumnDynamic::prepareVariantsForSquashing(const Columns & source_columns) +{ + /// Internal variants of source dynamic columns may differ. + /// We want to preallocate memory for all variants we will have after squashing. + /// It may happen that the total number of variants in source columns will + /// exceed the limit, in this case we will choose the most frequent variants. + + /// Collect all variants and their total sizes. + std::unordered_map total_variant_sizes; + DataTypes all_variants; + + auto add_variants = [&](const ColumnDynamic & source_dynamic) + { + const auto & source_variant_column = source_dynamic.getVariantColumn(); + const auto & source_variant_info = source_dynamic.getVariantInfo(); + const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); + + for (size_t i = 0; i != source_variants.size(); ++i) + { + const auto & variant_name = source_variant_info.variant_names[i]; + auto it = total_variant_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_variant_sizes.end()) + { + all_variants.push_back(source_variants[i]); + it = total_variant_sizes.emplace(variant_name, 0).first; + } + + it->second += source_variant_column.getVariantByGlobalDiscriminator(i).size(); + } + }; + + for (const auto & source_column : source_columns) + add_variants(assert_cast(*source_column)); + + /// Add variants from this dynamic column. + add_variants(*this); + + DataTypePtr result_variant_type; + /// Check if the number of all variants exceeds the limit. + if (!canAddNewVariants(0, all_variants.size())) + { + /// We want to keep the most frequent variants in the resulting dynamic column. + DataTypes result_variants; + result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant. + /// Add variants from current variant column as we will not rewrite it. + for (const auto & variant : assert_cast(*variant_info.variant_type).getVariants()) + result_variants.push_back(variant); + + /// Create list of remaining variants with their sizes and sort it. + std::vector> variants_with_sizes; + variants_with_sizes.reserve(all_variants.size() - variant_info.variant_names.size()); + for (const auto & variant : all_variants) + { + /// Add variant to the list only of we didn't add it yet. + auto variant_name = variant->getName(); + if (!variant_info.variant_name_to_discriminator.contains(variant_name)) + variants_with_sizes.emplace_back(total_variant_sizes[variant_name], variant); + } + + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + /// Add the most frequent variants until we reach max_dynamic_types. + for (const auto & [_, new_variant] : variants_with_sizes) + { + if (!canAddNewVariant(result_variants.size())) + break; + result_variants.push_back(new_variant); + } + + result_variant_type = std::make_shared(result_variants); + } + else + { + result_variant_type = std::make_shared(all_variants); + } + + if (!result_variant_type->equals(*variant_info.variant_type)) + updateVariantInfoAndExpandVariantColumn(result_variant_type); + + /// Now current dynamic column has all resulting variants and we can call + /// prepareForSquashing on them to preallocate the memory. + auto & variant_col = getVariantColumn(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_dynamic_column = assert_cast(*source_column); + const auto & source_variant_info = source_dynamic_column.getVariantInfo(); + /// Try to find this variant in the current source column. + auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); + if (it != source_variant_info.variant_name_to_discriminator.end()) + source_variant_columns.push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); + } + + variant_col.getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } +} + void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { if (!empty()) @@ -663,6 +1123,9 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source /// First, collect all variants from all source columns and calculate total sizes. std::unordered_map total_sizes; DataTypes all_variants; + /// Add shared variant type in advance; + all_variants.push_back(getSharedVariantDataType()); + total_sizes[getSharedVariantTypeName()] = 0; for (const auto & source_column : source_columns) { @@ -671,7 +1134,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source const auto & source_variant_info = source_dynamic.getVariantInfo(); const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); /// During deserialization from MergeTree we will have variant sizes statistics from the whole data part. - const auto & source_statistics = source_dynamic.getStatistics(); + const auto & source_statistics = source_dynamic.getStatistics(); for (size_t i = 0; i != source_variants.size(); ++i) { const auto & variant_name = source_variant_info.variant_names[i]; @@ -682,35 +1145,68 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source all_variants.push_back(source_variants[i]); it = total_sizes.emplace(variant_name, 0).first; } - auto statistics_it = source_statistics.data.find(variant_name); - size_t size = statistics_it == source_statistics.data.end() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : statistics_it->second; + size_t size = source_variant_column.getVariantByGlobalDiscriminator(i).size(); + if (source_statistics) + { + auto statistics_it = source_statistics->variants_statistics.find(variant_name); + if (statistics_it != source_statistics->variants_statistics.end()) + size = statistics_it->second; + } + it->second += size; } + + /// Use add variants from shared variant statistics. It can help extracting + /// frequent variants from shared variant to usual variants. + if (source_statistics) + { + for (const auto & [variant_name, size] : source_statistics->shared_variants_statistics) + { + auto it = total_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_sizes.end()) + { + all_variants.push_back(DataTypeFactory::instance().get(variant_name)); + it = total_sizes.emplace(variant_name, 0).first; + } + it->second += size; + } + } } DataTypePtr result_variant_type; - /// Check if the number of all variants exceeds the limit. - if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_sizes.contains("String"))) + Statistics new_statistics(Statistics::Source::MERGE); + /// Reset max_dynamic_types to global_max_dynamic_types. + max_dynamic_types = global_max_dynamic_types; + /// Check if the number of all dynamic types exceeds the limit. + if (!canAddNewVariants(0, all_variants.size())) { - /// Create list of variants with their sizes and sort it. - std::vector> variants_with_sizes; + /// Create a list of variants with their sizes and names and then sort it. + std::vector> variants_with_sizes; variants_with_sizes.reserve(all_variants.size()); for (const auto & variant : all_variants) - variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); + { + auto variant_name = variant->getName(); + if (variant_name != getSharedVariantTypeName()) + variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant); + } std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); - /// Take first max_dynamic_types variants from sorted list. + /// Take first max_dynamic_types variants from sorted list and fill shared_variants_statistics with the rest. DataTypes result_variants; - result_variants.reserve(max_dynamic_types); - /// Add String variant in advance. - result_variants.push_back(std::make_shared()); - for (const auto & [_, variant] : variants_with_sizes) + result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant. + /// Add shared variant. + result_variants.push_back(getSharedVariantDataType()); + for (const auto & [size, variant_name, variant_type] : variants_with_sizes) { - if (result_variants.size() == max_dynamic_types) + /// Add variant to the resulting variants list until we reach max_dynamic_types. + if (canAddNewVariant(result_variants.size())) + result_variants.push_back(variant_type); + /// Add all remaining variants into shared_variants_statistics until we reach its max size. + else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE) + new_statistics.shared_variants_statistics[variant_name] = size; + else break; - - if (variant->getName() != "String") - result_variants.push_back(variant); } result_variant_type = std::make_shared(result_variants); @@ -720,26 +1216,17 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source result_variant_type = std::make_shared(all_variants); } - /// Now we have resulting Variant and can fill variant info. - variant_info.variant_type = result_variant_type; - variant_info.variant_name = result_variant_type->getName(); - const auto & result_variants = assert_cast(*result_variant_type).getVariants(); - variant_info.variant_names.clear(); - variant_info.variant_names.reserve(result_variants.size()); - variant_info.variant_name_to_discriminator.clear(); - variant_info.variant_name_to_discriminator.reserve(result_variants.size()); - statistics.data.clear(); - statistics.data.reserve(result_variants.size()); - statistics.source = Statistics::Source::MERGE; - for (size_t i = 0; i != result_variants.size(); ++i) - { - auto variant_name = result_variants[i]->getName(); - variant_info.variant_names.push_back(variant_name); - variant_info.variant_name_to_discriminator[variant_name] = i; - statistics.data[variant_name] = total_sizes[variant_name]; - } + /// Now we have resulting Variant and can fill variant info and create merge statistics. + setVariantType(result_variant_type); + new_statistics.variants_statistics.reserve(variant_info.variant_names.size()); + for (const auto & variant_name : variant_info.variant_names) + new_statistics.variants_statistics[variant_name] = total_sizes[variant_name]; + statistics = std::make_shared(std::move(new_statistics)); - variant_column = variant_info.variant_type->createColumn(); + /// Reduce max_dynamic_types to the number of selected variants, so there will be no possibility + /// to extend selected variants on inerts into this column during merges. + /// -1 because we don't count shared variant in the limit. + max_dynamic_types = variant_info.variant_names.size() - 1; /// Now we have the resulting Variant that will be used in all merged columns. /// Variants can also contain Dynamic columns inside, we should collect @@ -755,7 +1242,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source { /// Try to find this variant in current source column. auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); - if (it != source_variant_info.variant_name_to_discriminator.end()) + if (it != source_variant_info.variant_name_to_discriminator.end()) /// Add shared variant. variants_source_columns[i].push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); } } @@ -767,12 +1254,12 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source void ColumnDynamic::applyNullMap(const ColumnVector::Container & null_map) { - assert_cast(*variant_column).applyNullMap(null_map); + variant_column_ptr->applyNullMap(null_map); } void ColumnDynamic::applyNegatedNullMap(const ColumnVector::Container & null_map) { - assert_cast(*variant_column).applyNegatedNullMap(null_map); + variant_column_ptr->applyNegatedNullMap(null_map); } } diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index e92cabd3db9..2ae862de3af 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -19,11 +20,19 @@ namespace DB * * When new values are inserted into Dynamic column, the internal Variant * type and column are extended if the inserted value has new type. + * When the limit on number of dynamic types is exceeded, all values + * with new types are inserted into special shared variant with type String + * that contains values and their types in binary format. */ class ColumnDynamic final : public COWHelper, ColumnDynamic> { public: - /// + /// Maximum limit on dynamic types. We use ColumnVariant to store all the types, + /// so the limit cannot be greater then ColumnVariant::MAX_NESTED_COLUMNS. + /// We also always have reserved variant for shared variant. + static constexpr size_t MAX_DYNAMIC_TYPES_LIMIT = ColumnVariant::MAX_NESTED_COLUMNS - 1; + static constexpr const char * SHARED_VARIANT_TYPE_NAME = "SharedVariant"; + struct Statistics { enum class Source @@ -32,12 +41,27 @@ public: MERGE, /// Statistics were calculated during merge of several MergeTree parts. }; + explicit Statistics(Source source_) : source(source_) {} + /// Source of the statistics. Source source; - /// Statistics data: (variant name) -> (total variant size in data part). - std::unordered_map data; + /// Statistics data for usual variants: (variant name) -> (total variant size in data part). + std::unordered_map variants_statistics; + /// Statistics data for variants from shared variant: (variant name) -> (total variant size in data part). + /// For shared variant we store statistics only for first 256 variants (should cover almost all cases and it's not expensive). + static constexpr const size_t MAX_SHARED_VARIANT_STATISTICS_SIZE = 256; + std::unordered_map shared_variants_statistics; }; + using StatisticsPtr = std::shared_ptr; + + struct ComparatorBase; + using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl; + using ComparatorAscendingStable = ComparatorAscendingStableImpl; + using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl; + using ComparatorDescendingStable = ComparatorDescendingStableImpl; + using ComparatorEqual = ComparatorEqualImpl; + private: friend class COWHelper, ColumnDynamic>; @@ -54,36 +78,40 @@ private: }; explicit ColumnDynamic(size_t max_dynamic_types_); - ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}); + ColumnDynamic(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}); + ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}); public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. */ using Base = COWHelper, ColumnDynamic>; - static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}) { - return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_); + return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_); } - static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}) { - return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_); + return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_); } - static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}); - - static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}) { - return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_); + return Base::create(std::move(variant_column_), variant_type_, max_dynamic_types_, global_max_dynamic_types_, statistics_); } - static MutablePtr create(size_t max_dynamic_types_) + static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {}) + { + return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, global_max_dynamic_types_, statistics_); + } + + static MutablePtr create(size_t max_dynamic_types_ = MAX_DYNAMIC_TYPES_LIMIT) { return Base::create(max_dynamic_types_); } - std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; } + std::string getName() const override { return "Dynamic(max_types=" + std::to_string(global_max_dynamic_types) + ")"; } const char * getFamilyName() const override { @@ -98,47 +126,41 @@ public: MutableColumnPtr cloneEmpty() const override { /// Keep current dynamic structure - return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics); + return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } MutableColumnPtr cloneResized(size_t size) const override { - return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics); + return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, global_max_dynamic_types, statistics); } size_t size() const override { - return variant_column->size(); + return variant_column_ptr->size(); } - Field operator[](size_t n) const override - { - return (*variant_column)[n]; - } + Field operator[](size_t n) const override; - void get(size_t n, Field & res) const override - { - variant_column->get(n, res); - } + void get(size_t n, Field & res) const override; bool isDefaultAt(size_t n) const override { - return variant_column->isDefaultAt(n); + return variant_column_ptr->isDefaultAt(n); } bool isNullAt(size_t n) const override { - return variant_column->isNullAt(n); + return variant_column_ptr->isNullAt(n); } StringRef getDataAt(size_t n) const override { - return variant_column->getDataAt(n); + return variant_column_ptr->getDataAt(n); } void insertData(const char * pos, size_t length) override { - variant_column->insertData(pos, length); + variant_column_ptr->insertData(pos, length); } void insert(const Field & x) override; @@ -156,17 +178,17 @@ public: void insertDefault() override { - variant_column->insertDefault(); + variant_column_ptr->insertDefault(); } void insertManyDefaults(size_t length) override { - variant_column->insertManyDefaults(length); + variant_column_ptr->insertManyDefaults(length); } void popBack(size_t n) override { - variant_column->popBack(n); + variant_column_ptr->popBack(n); } StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; @@ -177,46 +199,46 @@ public: WeakHash32 getWeakHash32() const override { - return variant_column->getWeakHash32(); + return variant_column_ptr->getWeakHash32(); } void updateHashFast(SipHash & hash) const override { - variant_column->updateHashFast(hash); + variant_column_ptr->updateHashFast(hash); } ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { - return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types); + return create(variant_column_ptr->filter(filt, result_size_hint), variant_info, max_dynamic_types, global_max_dynamic_types); } void expand(const Filter & mask, bool inverted) override { - variant_column->expand(mask, inverted); + variant_column_ptr->expand(mask, inverted); } ColumnPtr permute(const Permutation & perm, size_t limit) const override { - return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types); + return create(variant_column_ptr->permute(perm, limit), variant_info, max_dynamic_types, global_max_dynamic_types); } ColumnPtr index(const IColumn & indexes, size_t limit) const override { - return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types); + return create(variant_column_ptr->index(indexes, limit), variant_info, max_dynamic_types, global_max_dynamic_types); } ColumnPtr replicate(const Offsets & replicate_offsets) const override { - return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types); + return create(variant_column_ptr->replicate(replicate_offsets), variant_info, max_dynamic_types, global_max_dynamic_types); } MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override { - MutableColumns scattered_variant_columns = variant_column->scatter(num_columns, selector); + MutableColumns scattered_variant_columns = variant_column_ptr->scatter(num_columns, selector); MutableColumns scattered_columns; scattered_columns.reserve(num_columns); for (auto & scattered_variant_column : scattered_variant_columns) - scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types)); + scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types, global_max_dynamic_types)); return scattered_columns; } @@ -229,71 +251,76 @@ public: bool hasEqualValues() const override { - return variant_column->hasEqualValues(); + return variant_column_ptr->hasEqualValues(); } void getExtremes(Field & min, Field & max) const override { - variant_column->getExtremes(min, max); + variant_column_ptr->getExtremes(min, max); } void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override - { - variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res); - } + size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override - { - variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges); - } + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override { - variant_column->reserve(n); + variant_column_ptr->reserve(n); } + size_t capacity() const override + { + return variant_column_ptr->capacity(); + } + + void prepareForSquashing(const Columns & source_columns) override; + /// Prepare only variants but not discriminators and offsets. + void prepareVariantsForSquashing(const Columns & source_columns); + void ensureOwnership() override { - variant_column->ensureOwnership(); + variant_column_ptr->ensureOwnership(); } size_t byteSize() const override { - return variant_column->byteSize(); + return variant_column_ptr->byteSize(); } size_t byteSizeAt(size_t n) const override { - return variant_column->byteSizeAt(n); + return variant_column_ptr->byteSizeAt(n); } size_t allocatedBytes() const override { - return variant_column->allocatedBytes(); + return variant_column_ptr->allocatedBytes(); } void protect() override { - variant_column->protect(); + variant_column_ptr->protect(); } void forEachSubcolumn(MutableColumnCallback callback) override { callback(variant_column); + variant_column_ptr = assert_cast(variant_column.get()); } void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override { callback(*variant_column); + variant_column_ptr = assert_cast(variant_column.get()); variant_column->forEachSubcolumnRecursively(callback); } bool structureEquals(const IColumn & rhs) const override { if (const auto * rhs_concrete = typeid_cast(&rhs)) - return max_dynamic_types == rhs_concrete->max_dynamic_types; + return global_max_dynamic_types == rhs_concrete->global_max_dynamic_types; return false; } @@ -301,27 +328,27 @@ public: double getRatioOfDefaultRows(double sample_ratio) const override { - return variant_column->getRatioOfDefaultRows(sample_ratio); + return variant_column_ptr->getRatioOfDefaultRows(sample_ratio); } UInt64 getNumberOfDefaultRows() const override { - return variant_column->getNumberOfDefaultRows(); + return variant_column_ptr->getNumberOfDefaultRows(); } void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override { - variant_column->getIndicesOfNonDefaultRows(indices, from, limit); + variant_column_ptr->getIndicesOfNonDefaultRows(indices, from, limit); } void finalize() override { - variant_column->finalize(); + variant_column_ptr->finalize(); } bool isFinalized() const override { - return variant_column->isFinalized(); + return variant_column_ptr->isFinalized(); } /// Apply null map to a nested Variant column. @@ -333,20 +360,79 @@ public: const ColumnPtr & getVariantColumnPtr() const { return variant_column; } ColumnPtr & getVariantColumnPtr() { return variant_column; } - const ColumnVariant & getVariantColumn() const { return assert_cast(*variant_column); } - ColumnVariant & getVariantColumn() { return assert_cast(*variant_column); } + const ColumnVariant & getVariantColumn() const { return *variant_column_ptr; } + ColumnVariant & getVariantColumn() { return *variant_column_ptr; } - bool addNewVariant(const DataTypePtr & new_variant); - void addStringVariant(); + bool addNewVariant(const DataTypePtr & new_variant, const String & new_variant_name); + bool addNewVariant(const DataTypePtr & new_variant) { return addNewVariant(new_variant, new_variant->getName()); } bool hasDynamicStructure() const override { return true; } void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; - const Statistics & getStatistics() const { return statistics; } + const StatisticsPtr & getStatistics() const { return statistics; } + void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; } size_t getMaxDynamicTypes() const { return max_dynamic_types; } + /// Check if we can add new variant types. + /// Shared variant doesn't count in the limit but always presents, + /// so we should subtract 1 from the total types count. + bool canAddNewVariants(size_t current_variants_count, size_t new_variants_count) const { return current_variants_count + new_variants_count - 1 <= max_dynamic_types; } + bool canAddNewVariant(size_t current_variants_count) const { return canAddNewVariants(current_variants_count, 1); } + bool canAddNewVariants(size_t new_variants_count) const { return canAddNewVariants(variant_info.variant_names.size(), new_variants_count); } + bool canAddNewVariant() const { return canAddNewVariants(variant_info.variant_names.size(), 1); } + + void setVariantType(const DataTypePtr & variant_type); + void setMaxDynamicPaths(size_t max_dynamic_type_); + + static const String & getSharedVariantTypeName() + { + static const String name = SHARED_VARIANT_TYPE_NAME; + return name; + } + + static DataTypePtr getSharedVariantDataType(); + + ColumnVariant::Discriminator getSharedVariantDiscriminator() const + { + return variant_info.variant_name_to_discriminator.at(getSharedVariantTypeName()); + } + + ColumnString & getSharedVariant() + { + return assert_cast(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator())); + } + + const ColumnString & getSharedVariant() const + { + return assert_cast(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator())); + } + + /// Serializes type and value in binary format into provided shared variant. Doesn't update Variant discriminators and offsets. + static void serializeValueIntoSharedVariant(ColumnString & shared_variant, const IColumn & src, const DataTypePtr & type, const SerializationPtr & serialization, size_t n); + + /// Insert value into shared variant. Also updates Variant discriminators and offsets. + void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n); + + const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name) const + { + /// Get serialization for provided data type. + /// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size. + /// When max size is reached, just clear the cache. + if (serialization_cache.size() == SERIALIZATION_CACHE_MAX_SIZE) + serialization_cache.clear(); + + if (auto it = serialization_cache.find(variant_name); it != serialization_cache.end()) + return it->second; + + return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second; + } + + const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); } + private: + void createVariantInfo(const DataTypePtr & variant_type); + /// Combine current variant with the other variant and return global discriminators mapping /// from other variant to the combined one. It's used for inserting from /// different variants. @@ -356,15 +442,26 @@ private: void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type); WrappedPtr variant_column; + /// Store and use pointer to ColumnVariant to avoid virtual calls. + /// ColumnDynamic is widely used inside ColumnObject for each path and + /// with hundreds of paths these virtual calls are noticeable. + ColumnVariant * variant_column_ptr; /// Store the type of current variant with some additional information. VariantInfo variant_info; /// The maximum number of different types that can be stored in this Dynamic column. - /// If exceeded, all new variants will be converted to String. + /// If exceeded, all new variants will be added to a special shared variant with type String + /// in binary format. This limit can be different for different instances of Dynamic column. + /// When max_dynamic_types = 0, we will have only shared variant and insert all values into it. size_t max_dynamic_types; + /// The types limit specified in the data type by the user Dynamic(max_types=N). + /// max_dynamic_types in all column instances of this Dynamic type can be only smaller + /// (for example, max_dynamic_types can be reduced in takeDynamicStructureFromSourceColumns + /// before merge of different Dynamic columns). + size_t global_max_dynamic_types; /// Size statistics of each variants from MergeTree data part. /// Used in takeDynamicStructureFromSourceColumns and set during deserialization. - Statistics statistics; + StatisticsPtr statistics; /// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column). /// Used to avoid mappings recalculation in combineVariants for the same Variant types. @@ -372,6 +469,17 @@ private: /// Cache of Variant types that couldn't be combined with current variant in Dynamic column. /// Used to avoid checking if combination is possible for the same Variant types. std::unordered_set variants_with_failed_combination; + + /// We can use serializations of different data types to serialize values into shared variant. + /// To avoid creating the same serialization multiple times, use simple cache. + static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256; + mutable std::unordered_map serialization_cache; }; +void extendVariantColumn( + IColumn & variant_column, + const DataTypePtr & old_variant_type, + const DataTypePtr & new_variant_type, + std::unordered_map old_variant_name_to_discriminator); + } diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 0bb3f7edb14..04e894ee5ab 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const void ColumnFixedString::insert(const Field & x) { - const String & s = x.get(); + const String & s = x.safeGet(); insertData(s.data(), s.size()); } @@ -67,7 +67,7 @@ bool ColumnFixedString::tryInsert(const Field & x) { if (x.getType() != Field::Types::Which::String) return false; - const String & s = x.get(); + const String & s = x.safeGet(); if (s.size() > n) return false; insertData(s.data(), s.size()); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 676ac7712ba..8cf0a6a57da 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -182,6 +182,11 @@ public: chars.reserve_exact(n * size); } + size_t capacity() const override + { + return chars.capacity() / n; + } + void shrinkToFit() override { chars.shrink_to_fit(); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 3766b247d60..3cc1c8919c0 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -46,8 +46,8 @@ public: return Base::create(std::move(column_unique), std::move(indexes), is_shared); } - std::string getName() const override { return "ColumnLowCardinality"; } - const char * getFamilyName() const override { return "ColumnLowCardinality"; } + std::string getName() const override { return "LowCardinality(" + getDictionary().getNestedColumn()->getName() + ")"; } + const char * getFamilyName() const override { return "LowCardinality"; } TypeIndex getDataType() const override { return TypeIndex::LowCardinality; } ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); } @@ -172,6 +172,7 @@ public: } void reserve(size_t n) override { idx.reserve(n); } + size_t capacity() const override { return idx.capacity(); } void shrinkToFit() override { idx.shrinkToFit(); } /// Don't count the dictionary size as it can be shared between different blocks. @@ -309,6 +310,7 @@ public: void popBack(size_t n) { positions->popBack(n); } void reserve(size_t n) { positions->reserve(n); } + size_t capacity() const { return positions->capacity(); } void shrinkToFit() { positions->shrinkToFit(); } UInt64 getMaxPositionForCurrentType() const; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 1025b4e77b9..536da4d06d0 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -72,7 +72,7 @@ void ColumnMap::get(size_t n, Field & res) const size_t size = offsets[n] - offsets[n - 1]; res = Map(); - auto & map = res.get(); + auto & map = res.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) @@ -96,7 +96,7 @@ void ColumnMap::insertData(const char *, size_t) void ColumnMap::insert(const Field & x) { - const auto & map = x.get(); + const auto & map = x.safeGet(); nested->insert(Array(map.begin(), map.end())); } @@ -105,7 +105,7 @@ bool ColumnMap::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Map) return false; - const auto & map = x.get(); + const auto & map = x.safeGet(); return nested->tryInsert(Array(map.begin(), map.end())); } @@ -249,6 +249,20 @@ void ColumnMap::reserve(size_t n) nested->reserve(n); } +size_t ColumnMap::capacity() const +{ + return nested->capacity(); +} + +void ColumnMap::prepareForSquashing(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested->prepareForSquashing(nested_source_columns); +} + void ColumnMap::shrinkToFit() { nested->shrinkToFit(); @@ -288,8 +302,8 @@ void ColumnMap::getExtremes(Field & min, Field & max) const /// Convert result Array fields to Map fields because client expect min and max field to have type Map - Array nested_min_value = nested_min.get(); - Array nested_max_value = nested_max.get(); + Array nested_min_value = nested_min.safeGet(); + Array nested_max_value = nested_max.safeGet(); Map map_min_value(nested_min_value.begin(), nested_min_value.end()); Map map_max_value(nested_max_value.begin(), nested_max_value.end()); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 3eaaa0ad562..39d15a586b9 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -94,6 +94,8 @@ public: void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6529f0b78db..ec375ea5a8d 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -706,6 +706,27 @@ void ColumnNullable::reserve(size_t n) getNullMapData().reserve(n); } +size_t ColumnNullable::capacity() const +{ + return getNullMapData().capacity(); +} + +void ColumnNullable::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_nullable_column = assert_cast(*source_column); + new_size += source_nullable_column.size(); + nested_source_columns.push_back(source_nullable_column.getNestedColumnPtr()); + } + + nested_column->prepareForSquashing(nested_source_columns); + getNullMapData().reserve(new_size); +} + void ColumnNullable::shrinkToFit() { getNestedColumn().shrinkToFit(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index fe9f5b6dcc2..78274baca51 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -125,6 +125,8 @@ public: size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index a6431007cb6..e397b03b69e 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1,766 +1,444 @@ -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int DUPLICATE_COLUMN; - extern const int EXPERIMENTAL_FEATURE_ERROR; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } namespace { -/// Recreates column with default scalar values and keeps sizes of arrays. -ColumnPtr recreateColumnWithDefaultValues( - const ColumnPtr & column, const DataTypePtr & scalar_type, size_t num_dimensions) +const FormatSettings & getFormatSettings() { - const auto * column_array = checkAndGetColumn(column.get()); - if (column_array && num_dimensions) - { - return ColumnArray::create( - recreateColumnWithDefaultValues( - column_array->getDataPtr(), scalar_type, num_dimensions - 1), - IColumn::mutate(column_array->getOffsetsPtr())); - } - - return createArrayOfType(scalar_type, num_dimensions)->createColumn()->cloneResized(column->size()); + static const FormatSettings settings; + return settings; } -/// Replaces NULL fields to given field or empty array. -class FieldVisitorReplaceNull : public StaticVisitor +const std::shared_ptr & getDynamicSerialization() { -public: - explicit FieldVisitorReplaceNull( - const Field & replacement_, size_t num_dimensions_) - : replacement(replacement_) - , num_dimensions(num_dimensions_) - { - } - - Field operator()(const Null &) const - { - return num_dimensions ? Array() : replacement; - } - - Field operator()(const Array & x) const - { - assert(num_dimensions > 0); - const size_t size = x.size(); - Array res(size); - for (size_t i = 0; i < size; ++i) - res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]); - return res; - } - - template - Field operator()(const T & x) const { return x; } - -private: - const Field & replacement; - size_t num_dimensions; -}; - -/// Visitor that allows to get type of scalar field -/// or least common type of scalars in array. -/// More optimized version of FieldToDataType. -class FieldVisitorToScalarType : public StaticVisitor<> -{ -public: - using FieldType = Field::Types::Which; - - void operator()(const Array & x) - { - size_t size = x.size(); - for (size_t i = 0; i < size; ++i) - applyVisitor(*this, x[i]); - } - - void operator()(const UInt64 & x) - { - field_types.insert(FieldType::UInt64); - if (x <= std::numeric_limits::max()) - type_indexes.insert(TypeIndex::UInt8); - else if (x <= std::numeric_limits::max()) - type_indexes.insert(TypeIndex::UInt16); - else if (x <= std::numeric_limits::max()) - type_indexes.insert(TypeIndex::UInt32); - else - type_indexes.insert(TypeIndex::UInt64); - } - - void operator()(const Int64 & x) - { - field_types.insert(FieldType::Int64); - if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) - type_indexes.insert(TypeIndex::Int8); - else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) - type_indexes.insert(TypeIndex::Int16); - else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) - type_indexes.insert(TypeIndex::Int32); - else - type_indexes.insert(TypeIndex::Int64); - } - - void operator()(const bool &) - { - field_types.insert(FieldType::UInt64); - type_indexes.insert(TypeIndex::UInt8); - } - - void operator()(const Null &) - { - have_nulls = true; - } - - template - void operator()(const T &) - { - field_types.insert(Field::TypeToEnum>::value); - type_indexes.insert(TypeToTypeIndex>); - } - - DataTypePtr getScalarType() const { return getLeastSupertypeOrString(type_indexes); } - bool haveNulls() const { return have_nulls; } - bool needConvertField() const { return field_types.size() > 1; } - -private: - TypeIndexSet type_indexes; - std::unordered_set field_types; - bool have_nulls = false; -}; + static const std::shared_ptr dynamic_serialization = std::make_shared(); + return dynamic_serialization; +} } -FieldInfo getFieldInfo(const Field & field) +ColumnObject::ColumnObject( + std::unordered_map typed_paths_, + std::unordered_map dynamic_paths_, + MutableColumnPtr shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const StatisticsPtr & statistics_) + : shared_data(std::move(shared_data_)) + , max_dynamic_paths(max_dynamic_paths_) + , global_max_dynamic_paths(global_max_dynamic_paths_) + , max_dynamic_types(max_dynamic_types_) + , statistics(statistics_) { - FieldVisitorToScalarType to_scalar_type_visitor; - applyVisitor(to_scalar_type_visitor, field); - FieldVisitorToNumberOfDimensions to_number_dimension_visitor; + typed_paths.reserve(typed_paths_.size()); + for (auto & [path, column] : typed_paths_) + typed_paths[path] = std::move(column); - return + dynamic_paths.reserve(dynamic_paths_.size()); + dynamic_paths_ptrs.reserve(dynamic_paths_.size()); + for (auto & [path, column] : dynamic_paths_) { - to_scalar_type_visitor.getScalarType(), - to_scalar_type_visitor.haveNulls(), - to_scalar_type_visitor.needConvertField(), - applyVisitor(to_number_dimension_visitor, field), - to_number_dimension_visitor.need_fold_dimension - }; -} - -ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr && data_, bool is_nullable_) - : least_common_type(getDataTypeByColumn(*data_)) - , is_nullable(is_nullable_) - , num_rows(data_->size()) -{ - data.push_back(std::move(data_)); -} - -ColumnObject::Subcolumn::Subcolumn( - size_t size_, bool is_nullable_) - : least_common_type(std::make_shared()) - , is_nullable(is_nullable_) - , num_of_defaults_in_prefix(size_) - , num_rows(size_) -{ -} - -size_t ColumnObject::Subcolumn::size() const -{ - return num_rows; -} - -size_t ColumnObject::Subcolumn::byteSize() const -{ - size_t res = 0; - for (const auto & part : data) - res += part->byteSize(); - return res; -} - -size_t ColumnObject::Subcolumn::allocatedBytes() const -{ - size_t res = 0; - for (const auto & part : data) - res += part->allocatedBytes(); - return res; -} - -void ColumnObject::Subcolumn::get(size_t n, Field & res) const -{ - if (isFinalized()) - { - getFinalizedColumn().get(n, res); - return; - } - - size_t ind = n; - if (ind < num_of_defaults_in_prefix) - { - res = least_common_type.get()->getDefault(); - return; - } - - ind -= num_of_defaults_in_prefix; - for (const auto & part : data) - { - if (ind < part->size()) - { - part->get(ind, res); - res = convertFieldToTypeOrThrow(res, *least_common_type.get()); - return; - } - - ind -= part->size(); - } - - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); -} - -void ColumnObject::Subcolumn::checkTypes() const -{ - DataTypes prefix_types; - prefix_types.reserve(data.size()); - for (size_t i = 0; i < data.size(); ++i) - { - auto current_type = getDataTypeByColumn(*data[i]); - prefix_types.push_back(current_type); - auto prefix_common_type = getLeastSupertype(prefix_types); - if (!prefix_common_type->equals(*current_type)) - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, - "Data type {} of column at position {} cannot represent all columns from i-th prefix", - current_type->getName(), i); + dynamic_paths[path] = std::move(column); + dynamic_paths_ptrs[path] = assert_cast(dynamic_paths[path].get()); } } -void ColumnObject::Subcolumn::insert(Field field) +ColumnObject::ColumnObject( + std::unordered_map typed_paths_, size_t max_dynamic_paths_, size_t max_dynamic_types_) + : max_dynamic_paths(max_dynamic_paths_), global_max_dynamic_paths(max_dynamic_paths_), max_dynamic_types(max_dynamic_types_) { - auto info = DB::getFieldInfo(field); - insert(std::move(field), std::move(info)); -} - -void ColumnObject::Subcolumn::addNewColumnPart(DataTypePtr type) -{ - auto serialization = type->getSerialization(ISerialization::Kind::SPARSE); - data.push_back(type->createColumn(*serialization)); - least_common_type = LeastCommonType{std::move(type)}; -} - -static bool isConversionRequiredBetweenIntegers(const IDataType & lhs, const IDataType & rhs) -{ - /// If both of types are signed/unsigned integers and size of left field type - /// is less than right type, we don't need to convert field, - /// because all integer fields are stored in Int64/UInt64. - - WhichDataType which_lhs(lhs); - WhichDataType which_rhs(rhs); - - bool is_native_int = which_lhs.isNativeInt() && which_rhs.isNativeInt(); - bool is_native_uint = which_lhs.isNativeUInt() && which_rhs.isNativeUInt(); - - return (!is_native_int && !is_native_uint) - || lhs.getSizeOfValueInMemory() > rhs.getSizeOfValueInMemory(); -} - -void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) -{ - auto base_type = std::move(info.scalar_type); - - if (isNothing(base_type) && info.num_dimensions == 0) + typed_paths.reserve(typed_paths_.size()); + for (auto & [path, column] : typed_paths_) { - insertDefault(); - return; + if (!column->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected non-empty typed path column in ColumnObject constructor"); + typed_paths[path] = std::move(column); } - auto column_dim = least_common_type.getNumberOfDimensions(); - auto value_dim = info.num_dimensions; - - if (isNothing(least_common_type.get())) - column_dim = value_dim; - - if (isNothing(base_type)) - value_dim = column_dim; - - if (value_dim != column_dim) - throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, - "Dimension of types mismatched between inserted value and column. " - "Dimension of value: {}. Dimension of column: {}", - value_dim, column_dim); - - if (is_nullable) - base_type = makeNullable(base_type); - - if (!is_nullable && info.have_nulls) - field = applyVisitor(FieldVisitorReplaceNull(base_type->getDefault(), value_dim), std::move(field)); - - bool type_changed = false; - const auto & least_common_base_type = least_common_type.getBase(); - - if (data.empty()) - { - addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); - } - else if (!least_common_base_type->equals(*base_type) && !isNothing(base_type)) - { - if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type)) - { - base_type = getLeastSupertypeOrString(DataTypes{std::move(base_type), least_common_base_type}); - type_changed = true; - if (!least_common_base_type->equals(*base_type)) - addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); - } - } - - if (type_changed || info.need_convert) - field = convertFieldToTypeOrThrow(field, *least_common_type.get()); - - if (!data.back()->tryInsert(field)) - { - /** Normalization of the field above is pretty complicated (it uses several FieldVisitors), - * so in the case of a bug, we may get mismatched types. - * The `IColumn::insert` method does not check the type of the inserted field, and it can lead to a segmentation fault. - * Therefore, we use the safer `tryInsert` method to get an exception instead of a segmentation fault. - */ - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, - "Cannot insert field {} to column {}", - field.dump(), data.back()->dumpStructure()); - } - - ++num_rows; + MutableColumns paths_and_values; + paths_and_values.emplace_back(ColumnString::create()); + paths_and_values.emplace_back(ColumnString::create()); + shared_data = ColumnArray::create(ColumnTuple::create(std::move(paths_and_values))); } -void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length) +ColumnObject::Ptr ColumnObject::create( + const std::unordered_map & typed_paths_, + const std::unordered_map & dynamic_paths_, + const ColumnPtr & shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const ColumnObject::StatisticsPtr & statistics_) { - assert(start + length <= src.size()); - size_t end = start + length; - num_rows += length; + std::unordered_map mutable_typed_paths; + mutable_typed_paths.reserve(typed_paths_.size()); + for (const auto & [path, column] : typed_paths_) + mutable_typed_paths[path] = typed_paths_.at(path)->assumeMutable(); - if (data.empty()) - { - addNewColumnPart(src.getLeastCommonType()); - } - else if (!least_common_type.get()->equals(*src.getLeastCommonType())) - { - auto new_least_common_type = getLeastSupertypeOrString(DataTypes{least_common_type.get(), src.getLeastCommonType()}); - if (!new_least_common_type->equals(*least_common_type.get())) - addNewColumnPart(std::move(new_least_common_type)); - } + std::unordered_map mutable_dynamic_paths; + mutable_dynamic_paths.reserve(dynamic_paths_.size()); + for (const auto & [path, column] : dynamic_paths_) + mutable_dynamic_paths[path] = dynamic_paths_.at(path)->assumeMutable(); - if (end <= src.num_of_defaults_in_prefix) - { - data.back()->insertManyDefaults(length); - return; - } - - if (start < src.num_of_defaults_in_prefix) - data.back()->insertManyDefaults(src.num_of_defaults_in_prefix - start); - - auto insert_from_part = [&](const auto & column, size_t from, size_t n) - { - assert(from + n <= column->size()); - auto column_type = getDataTypeByColumn(*column); - - if (column_type->equals(*least_common_type.get())) - { - data.back()->insertRangeFrom(*column, from, n); - return; - } - - /// If we need to insert large range, there is no sense to cut part of column and cast it. - /// Casting of all column and inserting from it can be faster. - /// Threshold is just a guess. - - if (n * 3 >= column->size()) - { - auto casted_column = castColumn({column, column_type, ""}, least_common_type.get()); - data.back()->insertRangeFrom(*casted_column, from, n); - return; - } - - auto casted_column = column->cut(from, n); - casted_column = castColumn({casted_column, column_type, ""}, least_common_type.get()); - data.back()->insertRangeFrom(*casted_column, 0, n); - }; - - size_t pos = 0; - size_t processed_rows = src.num_of_defaults_in_prefix; - - /// Find the first part of the column that intersects the range. - while (pos < src.data.size() && processed_rows + src.data[pos]->size() < start) - { - processed_rows += src.data[pos]->size(); - ++pos; - } - - /// Insert from the first part of column. - if (pos < src.data.size() && processed_rows < start) - { - size_t part_start = start - processed_rows; - size_t part_length = std::min(src.data[pos]->size() - part_start, end - start); - insert_from_part(src.data[pos], part_start, part_length); - processed_rows += src.data[pos]->size(); - ++pos; - } - - /// Insert from the parts of column in the middle of range. - while (pos < src.data.size() && processed_rows + src.data[pos]->size() < end) - { - insert_from_part(src.data[pos], 0, src.data[pos]->size()); - processed_rows += src.data[pos]->size(); - ++pos; - } - - /// Insert from the last part of column if needed. - if (pos < src.data.size() && processed_rows < end) - { - size_t part_end = end - processed_rows; - insert_from_part(src.data[pos], 0, part_end); - } + return ColumnObject::create( + std::move(mutable_typed_paths), + std::move(mutable_dynamic_paths), + shared_data_->assumeMutable(), + max_dynamic_paths_, + global_max_dynamic_paths_, + max_dynamic_types_, + statistics_); } -bool ColumnObject::Subcolumn::isFinalized() const +ColumnObject::MutablePtr ColumnObject::create( + std::unordered_map typed_paths_, + std::unordered_map dynamic_paths_, + MutableColumnPtr shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const ColumnObject::StatisticsPtr & statistics_) { - return num_of_defaults_in_prefix == 0 && - (data.empty() || (data.size() == 1 && !data[0]->isSparse())); + return Base::create(std::move(typed_paths_), std::move(dynamic_paths_), std::move(shared_data_), max_dynamic_paths_, global_max_dynamic_paths_, max_dynamic_types_, statistics_); } -void ColumnObject::Subcolumn::finalize() +ColumnObject::MutablePtr ColumnObject::create(std::unordered_map typed_paths_, size_t max_dynamic_paths_, size_t max_dynamic_types_) { - if (isFinalized()) - return; - - if (data.size() == 1 && num_of_defaults_in_prefix == 0) - { - data[0] = data[0]->convertToFullColumnIfSparse(); - return; - } - - const auto & to_type = least_common_type.get(); - auto result_column = to_type->createColumn(); - - if (num_of_defaults_in_prefix) - result_column->insertManyDefaults(num_of_defaults_in_prefix); - - for (auto & part : data) - { - part = part->convertToFullColumnIfSparse(); - auto from_type = getDataTypeByColumn(*part); - size_t part_size = part->size(); - - if (!from_type->equals(*to_type)) - { - auto offsets = ColumnUInt64::create(); - auto & offsets_data = offsets->getData(); - - /// We need to convert only non-default values and then recreate column - /// with default value of new type, because default values (which represents misses in data) - /// may be inconsistent between types (e.g "0" in UInt64 and empty string in String). - - part->getIndicesOfNonDefaultRows(offsets_data, 0, part_size); - - if (offsets->size() == part_size) - { - part = castColumn({part, from_type, ""}, to_type); - } - else - { - auto values = part->index(*offsets, offsets->size()); - values = castColumn({values, from_type, ""}, to_type); - part = values->createWithOffsets(offsets_data, *createColumnConstWithDefaultValue(result_column->getPtr()), part_size, /*shift=*/ 0); - } - } - - result_column->insertRangeFrom(*part, 0, part_size); - } - - data = { std::move(result_column) }; - num_of_defaults_in_prefix = 0; + return Base::create(std::move(typed_paths_), max_dynamic_paths_, max_dynamic_types_); } -void ColumnObject::Subcolumn::insertDefault() +std::string ColumnObject::getName() const { - if (data.empty()) - ++num_of_defaults_in_prefix; - else - data.back()->insertDefault(); - - ++num_rows; + WriteBufferFromOwnString ss; + ss << "Object("; + ss << "max_dynamic_paths=" << global_max_dynamic_paths; + ss << ", max_dynamic_types=" << max_dynamic_types; + std::vector sorted_typed_paths; + sorted_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + sorted_typed_paths.push_back(path); + std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end()); + for (const auto & path : sorted_typed_paths) + ss << ", " << path << " " << typed_paths.at(path)->getName(); + ss << ")"; + return ss.str(); } -void ColumnObject::Subcolumn::insertManyDefaults(size_t length) +MutableColumnPtr ColumnObject::cloneEmpty() const { - if (data.empty()) - num_of_defaults_in_prefix += length; - else - data.back()->insertManyDefaults(length); + std::unordered_map empty_typed_paths; + empty_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + empty_typed_paths[path] = column->cloneEmpty(); - num_rows += length; + std::unordered_map empty_dynamic_paths; + empty_dynamic_paths.reserve(dynamic_paths.size()); + for (const auto & [path, column] : dynamic_paths) + empty_dynamic_paths[path] = column->cloneEmpty(); + + return ColumnObject::create( + std::move(empty_typed_paths), + std::move(empty_dynamic_paths), + shared_data->cloneEmpty(), + max_dynamic_paths, + global_max_dynamic_paths, + max_dynamic_types, + statistics); } -void ColumnObject::Subcolumn::popBack(size_t n) +MutableColumnPtr ColumnObject::cloneResized(size_t size) const { - assert(n <= size()); + std::unordered_map resized_typed_paths; + resized_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + resized_typed_paths[path] = column->cloneResized(size); - num_rows -= n; - size_t num_removed = 0; - for (auto it = data.rbegin(); it != data.rend(); ++it) - { - if (n == 0) - break; + std::unordered_map resized_dynamic_paths; + resized_dynamic_paths.reserve(dynamic_paths.size()); + for (const auto & [path, column] : dynamic_paths) + resized_dynamic_paths[path] = column->cloneResized(size); - auto & column = *it; - if (n < column->size()) - { - column->popBack(n); - n = 0; - } - else - { - ++num_removed; - n -= column->size(); - } - } - - data.resize(data.size() - num_removed); - num_of_defaults_in_prefix -= n; -} - -ColumnObject::Subcolumn ColumnObject::Subcolumn::cut(size_t start, size_t length) const -{ - Subcolumn new_subcolumn(0, is_nullable); - new_subcolumn.insertRangeFrom(*this, start, length); - return new_subcolumn; -} - -Field ColumnObject::Subcolumn::getLastField() const -{ - if (data.empty()) - return Field(); - - const auto & last_part = data.back(); - assert(!last_part->empty()); - return (*last_part)[last_part->size() - 1]; -} - -FieldInfo ColumnObject::Subcolumn::getFieldInfo() const -{ - const auto & base_type = least_common_type.getBase(); - return FieldInfo - { - .scalar_type = base_type, - .have_nulls = base_type->isNullable(), - .need_convert = false, - .num_dimensions = least_common_type.getNumberOfDimensions(), - .need_fold_dimension = false, - }; -} - -ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const FieldInfo & field_info) const -{ - auto scalar_type = field_info.scalar_type; - if (is_nullable) - scalar_type = makeNullable(scalar_type); - - Subcolumn new_subcolumn(*this); - new_subcolumn.least_common_type = LeastCommonType{createArrayOfType(scalar_type, field_info.num_dimensions)}; - - for (auto & part : new_subcolumn.data) - part = recreateColumnWithDefaultValues(part, scalar_type, field_info.num_dimensions); - - return new_subcolumn; -} - -IColumn & ColumnObject::Subcolumn::getFinalizedColumn() -{ - assert(isFinalized()); - return *data[0]; -} - -const IColumn & ColumnObject::Subcolumn::getFinalizedColumn() const -{ - assert(isFinalized()); - return *data[0]; -} - -const ColumnPtr & ColumnObject::Subcolumn::getFinalizedColumnPtr() const -{ - assert(isFinalized()); - return data[0]; -} - -ColumnObject::Subcolumn::LeastCommonType::LeastCommonType() - : type(std::make_shared()) - , base_type(type) - , num_dimensions(0) -{ -} - -ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_) - : type(std::move(type_)) - , base_type(getBaseTypeOfArray(type)) - , num_dimensions(DB::getNumberOfDimensions(*type)) -{ -} - -ColumnObject::ColumnObject(bool is_nullable_) - : is_nullable(is_nullable_) - , num_rows(0) -{ -} - -ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_) - : is_nullable(is_nullable_) - , subcolumns(std::move(subcolumns_)) - , num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size()) - -{ - checkConsistency(); -} - -void ColumnObject::checkConsistency() const -{ - if (subcolumns.empty()) - return; - - for (const auto & leaf : subcolumns) - { - if (num_rows != leaf->data.size()) - { - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject." - " Subcolumn '{}' has {} rows, but expected size is {}", - leaf->path.getPath(), leaf->data.size(), num_rows); - } - } -} - -size_t ColumnObject::size() const -{ -#ifndef NDEBUG - checkConsistency(); -#endif - return num_rows; -} - -size_t ColumnObject::byteSize() const -{ - size_t res = 0; - for (const auto & entry : subcolumns) - res += entry->data.byteSize(); - return res; -} - -size_t ColumnObject::allocatedBytes() const -{ - size_t res = 0; - for (const auto & entry : subcolumns) - res += entry->data.allocatedBytes(); - return res; -} - -void ColumnObject::forEachSubcolumn(MutableColumnCallback callback) -{ - for (auto & entry : subcolumns) - for (auto & part : entry->data.data) - callback(part); -} - -void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) -{ - for (auto & entry : subcolumns) - { - for (auto & part : entry->data.data) - { - callback(*part); - part->forEachSubcolumnRecursively(callback); - } - } -} - -void ColumnObject::insert(const Field & field) -{ - const auto & object = field.get(); - - HashSet inserted_paths; - size_t old_size = size(); - for (const auto & [key_str, value] : object) - { - PathInData key(key_str); - inserted_paths.insert(key_str); - if (!hasSubcolumn(key)) - addSubcolumn(key, old_size); - - auto & subcolumn = getSubcolumn(key); - subcolumn.insert(value); - } - - for (auto & entry : subcolumns) - { - if (!inserted_paths.has(entry->path.getPath())) - { - bool inserted = tryInsertDefaultFromNested(entry); - if (!inserted) - entry->data.insertDefault(); - } - } - - ++num_rows; -} - -bool ColumnObject::tryInsert(const Field & field) -{ - if (field.getType() != Field::Types::Which::Object) - return false; - - insert(field); - return true; -} - -void ColumnObject::insertDefault() -{ - for (auto & entry : subcolumns) - entry->data.insertDefault(); - - ++num_rows; + return ColumnObject::create( + std::move(resized_typed_paths), + std::move(resized_dynamic_paths), + shared_data->cloneResized(size), + max_dynamic_paths, + global_max_dynamic_paths, + max_dynamic_types, + statistics); } Field ColumnObject::operator[](size_t n) const { - Field object; - get(n, object); + Object object; + + for (const auto & [path, column] : typed_paths) + object[path] = (*column)[n]; + for (const auto & [path, column] : dynamic_paths_ptrs) + { + /// Output only non-null values from dynamic paths. We cannot distinguish cases when + /// dynamic path has Null value and when it's absent in the row and consider them equivalent. + if (!column->isNullAt(n)) + object[path] = (*column)[n]; + } + + const auto & shared_data_offsets = getSharedDataOffsets(); + const auto [shared_paths, shared_values] = getSharedDataPathsAndValues(); + size_t start = shared_data_offsets[static_cast(n) - 1]; + size_t end = shared_data_offsets[n]; + for (size_t i = start; i != end; ++i) + { + String path = shared_paths->getDataAt(i).toString(); + auto value_data = shared_values->getDataAt(i); + ReadBufferFromMemory buf(value_data.data, value_data.size); + Field value; + getDynamicSerialization()->deserializeBinary(value, buf, getFormatSettings()); + object[path] = value; + } + return object; } void ColumnObject::get(size_t n, Field & res) const { - assert(n < size()); - res = Object(); - auto & object = res.get(); + res = (*this)[n]; +} - for (const auto & entry : subcolumns) +bool ColumnObject::isDefaultAt(size_t n) const +{ + for (const auto & [path, column] : typed_paths) { - auto it = object.try_emplace(entry->path.getPath()).first; - entry->data.get(n, it->second); + if (!column->isDefaultAt(n)) + return false; } + + for (const auto & [path, column] : dynamic_paths_ptrs) + { + if (!column->isDefaultAt(n)) + return false; + } + + if (!shared_data->isDefaultAt(n)) + return false; + + return true; +} + +StringRef ColumnObject::getDataAt(size_t) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDataAt is not supported for {}", getName()); +} + +void ColumnObject::insertData(const char *, size_t) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertData is not supported for {}", getName()); +} + +ColumnDynamic * ColumnObject::tryToAddNewDynamicPath(std::string_view path) +{ + if (dynamic_paths.size() == max_dynamic_paths) + return nullptr; + + auto new_dynamic_column = ColumnDynamic::create(max_dynamic_types); + new_dynamic_column->reserve(shared_data->capacity()); + new_dynamic_column->insertManyDefaults(size()); + auto it = dynamic_paths.emplace(path, std::move(new_dynamic_column)).first; + auto it_ptr = dynamic_paths_ptrs.emplace(path, assert_cast(it->second.get())).first; + return it_ptr->second; +} + +void ColumnObject::addNewDynamicPath(std::string_view path) +{ + if (!tryToAddNewDynamicPath(path)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add new dynamic path as the limit ({}) on dynamic paths is reached", max_dynamic_paths); +} + +void ColumnObject::setMaxDynamicPaths(size_t max_dynamic_paths_) +{ + if (!empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting specific max_dynamic_paths parameter is allowed only for empty object column"); + + max_dynamic_paths = max_dynamic_paths_; +} + +void ColumnObject::setDynamicPaths(const std::vector & paths) +{ + if (paths.size() > max_dynamic_paths) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set dynamic paths to Object column, the number of paths ({}) exceeds the limit ({})", paths.size(), max_dynamic_paths); + + size_t size = this->size(); + for (const auto & path : paths) + { + auto new_dynamic_column = ColumnDynamic::create(max_dynamic_types); + if (size) + new_dynamic_column->insertManyDefaults(size); + dynamic_paths[path] = std::move(new_dynamic_column); + dynamic_paths_ptrs[path] = assert_cast(dynamic_paths[path].get()); + } +} + +void ColumnObject::insert(const Field & x) +{ + const auto & object = x.safeGet(); + auto & shared_data_offsets = getSharedDataOffsets(); + auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + size_t current_size = size(); + for (const auto & [path, value_field] : object) + { + if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end()) + { + typed_it->second->insert(value_field); + } + else if (auto dynamic_it = dynamic_paths_ptrs.find(path); dynamic_it != dynamic_paths_ptrs.end()) + { + dynamic_it->second->insert(value_field); + } + else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) + { + dynamic_path_column->insert(value_field); + } + /// We reached the limit on dynamic paths. Add this path to the common data if the value is not Null. + /// (we cannot distinguish cases when path has Null value or is absent in the row and consider them equivalent). + /// Object is actually std::map, so all paths are already sorted and we can add it right now. + else if (!value_field.isNull()) + { + shared_data_paths->insertData(path.data(), path.size()); + auto & shared_data_values_chars = shared_data_values->getChars(); + WriteBufferFromVector value_buf(shared_data_values_chars, AppendModeTag()); + getDynamicSerialization()->serializeBinary(value_field, value_buf, getFormatSettings()); + value_buf.finalize(); + shared_data_values_chars.push_back(0); + shared_data_values->getOffsets().push_back(shared_data_values_chars.size()); + } + } + + shared_data_offsets.push_back(shared_data_paths->size()); + + /// Fill all remaining typed and dynamic paths with default values. + for (auto & [_, column] : typed_paths) + { + if (column->size() == current_size) + column->insertDefault(); + } + + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() == current_size) + column->insertDefault(); + } +} + +bool ColumnObject::tryInsert(const Field & x) +{ + if (x.getType() != Field::Types::Which::Object) + return false; + + const auto & object = x.safeGet(); + auto & shared_data_offsets = getSharedDataOffsets(); + auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + size_t prev_size = size(); + size_t prev_paths_size = shared_data_paths->size(); + size_t prev_values_size = shared_data_values->size(); + /// Save all newly added dynamic paths. In case of failure + /// we should remove them. + std::unordered_set new_dynamic_paths; + auto restore_sizes = [&]() + { + for (auto & [_, column] : typed_paths) + { + if (column->size() != prev_size) + column->popBack(column->size() - prev_size); + } + + /// Remove all newly added dynamic paths. + for (const auto & path : new_dynamic_paths) + { + dynamic_paths_ptrs.erase(path); + dynamic_paths.erase(path); + } + + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() != prev_size) + column->popBack(column->size() - prev_size); + } + + if (shared_data_paths->size() != prev_paths_size) + shared_data_paths->popBack(shared_data_paths->size() - prev_paths_size); + if (shared_data_values->size() != prev_values_size) + shared_data_values->popBack(shared_data_values->size() - prev_values_size); + }; + + for (const auto & [path, value_field] : object) + { + if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end()) + { + if (!typed_it->second->tryInsert(value_field)) + { + restore_sizes(); + return false; + } + } + else if (auto dynamic_it = dynamic_paths_ptrs.find(path); dynamic_it != dynamic_paths_ptrs.end()) + { + if (!dynamic_it->second->tryInsert(value_field)) + { + restore_sizes(); + return false; + } + } + else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) + { + if (!dynamic_path_column->tryInsert(value_field)) + { + restore_sizes(); + return false; + } + } + /// We reached the limit on dynamic paths. Add this path to the common data if the value is not Null. + /// (we cannot distinguish cases when path has Null value or is absent in the row and consider them equivalent). + /// Object is actually std::map, so all paths are already sorted and we can add it right now. + else if (!value_field.isNull()) + { + WriteBufferFromOwnString value_buf; + getDynamicSerialization()->serializeBinary(value_field, value_buf, getFormatSettings()); + shared_data_paths->insertData(path.data(), path.size()); + shared_data_values->insertData(value_buf.str().data(), value_buf.str().size()); + } + } + + shared_data_offsets.push_back(shared_data_paths->size()); + + /// Fill all remaining typed and dynamic paths with default values. + for (auto & [_, column] : typed_paths) + { + if (column->size() == prev_size) + column->insertDefault(); + } + + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() == prev_size) + column->insertDefault(); + } + + return true; } #if !defined(DEBUG_OR_SANITIZER_BUILD) @@ -769,7 +447,31 @@ void ColumnObject::insertFrom(const IColumn & src, size_t n) void ColumnObject::doInsertFrom(const IColumn & src, size_t n) #endif { - insert(src[n]); + const auto & src_object_column = assert_cast(src); + + /// First, insert typed paths, they must be the same for both columns. + for (const auto & [path, column] : src_object_column.typed_paths) + typed_paths[path]->insertFrom(*column, n); + + /// Second, insert dynamic paths and extend them if needed. + /// We can reach the limit of dynamic paths, and in this case + /// the rest of dynamic paths will be inserted into shared data. + std::vector src_dynamic_paths_for_shared_data; + for (const auto & [path, column] : src_object_column.dynamic_paths) + { + /// Check if we already have such dynamic path. + if (auto it = dynamic_paths_ptrs.find(path); it != dynamic_paths_ptrs.end()) + it->second->insertFrom(*column, n); + /// Try to add a new dynamic path. + else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) + dynamic_path_column->insertFrom(*column, n); + /// Limit on dynamic paths is reached, add path to shared data later. + else + src_dynamic_paths_for_shared_data.push_back(path); + } + + /// Finally, insert paths from shared data. + insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, std::move(src_dynamic_paths_for_shared_data), n, 1); } #if !defined(DEBUG_OR_SANITIZER_BUILD) @@ -778,101 +480,659 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) #endif { - const auto & src_object = assert_cast(src); + /// TODO: try to parallelize doInsertRangeFrom over typed/dynamic paths if it makes sense. + const auto & src_object_column = assert_cast(src); - for (const auto & entry : src_object.subcolumns) + /// First, insert typed paths, they must be the same for both columns. + for (const auto & [path, column] : src_object_column.typed_paths) + typed_paths[path]->insertRangeFrom(*column, start, length); + + /// Second, insert dynamic paths and extend them if needed. + /// We can reach the limit of dynamic paths, and in this case + /// the rest of dynamic paths will be inserted into shared data. + std::vector src_dynamic_paths_for_shared_data; + for (const auto & [path, column] : src_object_column.dynamic_paths) { - if (!hasSubcolumn(entry->path)) + /// Check if we already have such dynamic path. + if (auto it = dynamic_paths_ptrs.find(path); it != dynamic_paths_ptrs.end()) + it->second->insertRangeFrom(*column, start, length); + /// Try to add a new dynamic path. + else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) + dynamic_path_column->insertRangeFrom(*column, start, length); + /// Limit on dynamic paths is reached, add path to shared data later. + else + src_dynamic_paths_for_shared_data.push_back(path); + } + + /// Finally, insert paths from shared data. + insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, std::move(src_dynamic_paths_for_shared_data), start, length); +} + +void ColumnObject::insertFromSharedDataAndFillRemainingDynamicPaths(const DB::ColumnObject & src_object_column, std::vector && src_dynamic_paths_for_shared_data, size_t start, size_t length) +{ + /// Paths in shared data are sorted, so paths from src_dynamic_paths_for_shared_data should be inserted properly + /// to keep paths sorted. Let's sort them in advance. + std::sort(src_dynamic_paths_for_shared_data.begin(), src_dynamic_paths_for_shared_data.end()); + + /// Check if src object doesn't have any paths in shared data in specified range. + const auto & src_shared_data_offsets = src_object_column.getSharedDataOffsets(); + if (src_shared_data_offsets[static_cast(start) - 1] == src_shared_data_offsets[static_cast(start) + length - 1]) + { + size_t current_size = size(); + + /// If no src dynamic columns should be inserted into shared data, insert defaults. + if (src_dynamic_paths_for_shared_data.empty()) { - if (entry->path.hasNested()) - addNestedSubcolumn(entry->path, entry->data.getFieldInfo(), num_rows); + shared_data->insertManyDefaults(length); + } + /// Otherwise insert required src dynamic columns into shared data. + else + { + const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + auto & shared_data_offsets = getSharedDataOffsets(); + for (size_t i = start; i != start + length; ++i) + { + /// Paths in src_dynamic_paths_for_shared_data are already sorted. + for (const auto path : src_dynamic_paths_for_shared_data) + serializePathAndValueIntoSharedData(shared_data_paths, shared_data_values, path, *src_object_column.dynamic_paths.find(path)->second, i); + shared_data_offsets.push_back(shared_data_paths->size()); + } + } + + /// Insert default values in all remaining dynamic paths. + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() == current_size) + column->insertManyDefaults(length); + } + return; + } + + /// Src object column contains some paths in shared data in specified range. + /// Iterate over this range and insert all required paths into shared data or dynamic paths. + const auto [src_shared_data_paths, src_shared_data_values] = src_object_column.getSharedDataPathsAndValues(); + const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + auto & shared_data_offsets = getSharedDataOffsets(); + for (size_t row = start; row != start + length; ++row) + { + size_t current_size = shared_data_offsets.size(); + /// Use separate index to iterate over sorted src_dynamic_paths_for_shared_data. + size_t src_dynamic_paths_for_shared_data_index = 0; + size_t offset = src_shared_data_offsets[static_cast(row) - 1]; + size_t end = src_shared_data_offsets[row]; + for (size_t i = offset; i != end; ++i) + { + auto path = src_shared_data_paths->getDataAt(i).toView(); + /// Check if we have this path in dynamic paths. + if (auto it = dynamic_paths_ptrs.find(path); it != dynamic_paths_ptrs.end()) + { + /// Deserialize binary value into dynamic column from shared data. + deserializeValueFromSharedData(src_shared_data_values, i, *it->second); + } else - addSubcolumn(entry->path, num_rows); + { + /// Before inserting this path into shared data check if we need to + /// insert dynamic paths from src_dynamic_paths_for_shared_data before. + while (src_dynamic_paths_for_shared_data_index < src_dynamic_paths_for_shared_data.size() + && src_dynamic_paths_for_shared_data[src_dynamic_paths_for_shared_data_index] < path) + { + const auto dynamic_path = src_dynamic_paths_for_shared_data[src_dynamic_paths_for_shared_data_index]; + serializePathAndValueIntoSharedData(shared_data_paths, shared_data_values, dynamic_path, *src_object_column.dynamic_paths.find(dynamic_path)->second, row); + ++src_dynamic_paths_for_shared_data_index; + } + + /// Insert path and value from src shared data to our shared data. + shared_data_paths->insertFrom(*src_shared_data_paths, i); + shared_data_values->insertFrom(*src_shared_data_values, i); + } } - auto & subcolumn = getSubcolumn(entry->path); - subcolumn.insertRangeFrom(entry->data, start, length); - } - - for (auto & entry : subcolumns) - { - if (!src_object.hasSubcolumn(entry->path)) + /// Insert remaining dynamic paths from src_dynamic_paths_for_shared_data. + for (; src_dynamic_paths_for_shared_data_index != src_dynamic_paths_for_shared_data.size(); ++src_dynamic_paths_for_shared_data_index) { - bool inserted = tryInsertManyDefaultsFromNested(entry); - if (!inserted) - entry->data.insertManyDefaults(length); + const auto dynamic_path = src_dynamic_paths_for_shared_data[src_dynamic_paths_for_shared_data_index]; + serializePathAndValueIntoSharedData(shared_data_paths, shared_data_values, dynamic_path, *src_object_column.dynamic_paths.find(dynamic_path)->second, row); + } + + shared_data_offsets.push_back(shared_data_paths->size()); + + /// Insert default value in all remaining dynamic paths. + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() == current_size) + column->insertDefault(); } } - - num_rows += length; - finalize(); } -void ColumnObject::popBack(size_t length) +void ColumnObject::serializePathAndValueIntoSharedData(ColumnString * shared_data_paths, ColumnString * shared_data_values, std::string_view path, const IColumn & column, size_t n) { - for (auto & entry : subcolumns) - entry->data.popBack(length); + /// Don't store Null values in shared data. We consider Null value equivalent to the absence + /// of this path in the row because we cannot distinguish these 2 cases for dynamic paths. + if (column.isNullAt(n)) + return; - num_rows -= length; + shared_data_paths->insertData(path.data(), path.size()); + auto & shared_data_values_chars = shared_data_values->getChars(); + WriteBufferFromVector value_buf(shared_data_values_chars, AppendModeTag()); + getDynamicSerialization()->serializeBinary(column, n, value_buf, getFormatSettings()); + value_buf.finalize(); + shared_data_values_chars.push_back(0); + shared_data_values->getOffsets().push_back(shared_data_values_chars.size()); } -template -MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const +void ColumnObject::deserializeValueFromSharedData(const ColumnString * shared_data_values, size_t n, IColumn & column) const { - if (!isFinalized()) + auto value_data = shared_data_values->getDataAt(n); + ReadBufferFromMemory buf(value_data.data, value_data.size); + getDynamicSerialization()->deserializeBinary(column, buf, getFormatSettings()); +} + +void ColumnObject::insertDefault() +{ + for (auto & [_, column] : typed_paths) + column->insertDefault(); + for (auto & [_, column] : dynamic_paths_ptrs) + column->insertDefault(); + shared_data->insertDefault(); +} + +void ColumnObject::insertManyDefaults(size_t length) +{ + for (auto & [_, column] : typed_paths) + column->insertManyDefaults(length); + for (auto & [_, column] : dynamic_paths_ptrs) + column->insertManyDefaults(length); + shared_data->insertManyDefaults(length); +} + +void ColumnObject::popBack(size_t n) +{ + for (auto & [_, column] : typed_paths) + column->popBack(n); + for (auto & [_, column] : dynamic_paths_ptrs) + column->popBack(n); + shared_data->popBack(n); +} + +StringRef ColumnObject::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const +{ + StringRef res(begin, 0); + // Serialize all paths and values in binary format. + const auto & shared_data_offsets = getSharedDataOffsets(); + size_t offset = shared_data_offsets[static_cast(n) - 1]; + size_t end = shared_data_offsets[static_cast(n)]; + size_t num_paths = typed_paths.size() + dynamic_paths.size() + (end - offset); + char * pos = arena.allocContinue(sizeof(size_t), begin); + memcpy(pos, &num_paths, sizeof(size_t)); + res.data = pos - res.size; + res.size += sizeof(size_t); + /// Serialize paths and values from typed paths. + for (const auto & [path, column] : typed_paths) { - auto finalized = cloneFinalized(); - auto & finalized_object = assert_cast(*finalized); - return finalized_object.applyForSubcolumns(std::forward(func)); + size_t path_size = path.size(); + pos = arena.allocContinue(sizeof(size_t) + path_size, begin); + memcpy(pos, &path_size, sizeof(size_t)); + memcpy(pos + sizeof(size_t), path.data(), path_size); + auto data_ref = column->serializeValueIntoArena(n, arena, begin); + res.data = data_ref.data - res.size - sizeof(size_t) - path_size; + res.size += data_ref.size + sizeof(size_t) + path_size; } - auto res = ColumnObject::create(is_nullable); - for (const auto & subcolumn : subcolumns) + /// Serialize paths and values from dynamic paths. + for (const auto & [path, column] : dynamic_paths) { - auto new_subcolumn = func(subcolumn->data.getFinalizedColumn()); - res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable()); + WriteBufferFromOwnString buf; + getDynamicSerialization()->serializeBinary(*column, n, buf, getFormatSettings()); + serializePathAndValueIntoArena(arena, begin, path, buf.str(), res); } + /// Serialize paths and values from shared data. + auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + for (size_t i = offset; i != end; ++i) + serializePathAndValueIntoArena(arena, begin, shared_data_paths->getDataAt(i), shared_data_values->getDataAt(i), res); + return res; } -ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const +void ColumnObject::serializePathAndValueIntoArena(DB::Arena & arena, const char *& begin, StringRef path, StringRef value, StringRef & res) const { - return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }); + size_t value_size = value.size; + size_t path_size = path.size; + char * pos = arena.allocContinue(sizeof(size_t) + path_size + sizeof(size_t) + value_size, begin); + memcpy(pos, &path_size, sizeof(size_t)); + memcpy(pos + sizeof(size_t), path.data, path_size); + memcpy(pos + sizeof(size_t) + path_size, &value_size, sizeof(size_t)); + memcpy(pos + sizeof(size_t) + path_size + sizeof(size_t), value.data, value_size); + res.data = pos - res.size; + res.size += sizeof(size_t) + path_size + sizeof(size_t) + value_size; } -ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const +const char * ColumnObject::deserializeAndInsertFromArena(const char * pos) { - return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }); + size_t current_size = size(); + /// Deserialize paths and values and insert them into typed paths, dynamic paths or shared data. + /// Serialized paths could be unsorted, so we will have to sort all paths that will be inserted into shared data. + std::vector> paths_and_values_for_shared_data; + auto num_paths = unalignedLoad(pos); + pos += sizeof(size_t); + for (size_t i = 0; i != num_paths; ++i) + { + auto path_size = unalignedLoad(pos); + pos += sizeof(size_t); + std::string_view path(pos, path_size); + pos += path_size; + /// Check if it's a typed path. In this case we should use + /// deserializeAndInsertFromArena of corresponding column. + if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end()) + { + pos = typed_it->second->deserializeAndInsertFromArena(pos); + } + /// If it's not a typed path, deserialize binary value and try to insert it + /// to dynamic paths or shared data. + else + { + auto value_size = unalignedLoad(pos); + pos += sizeof(size_t); + std::string_view value(pos, value_size); + pos += value_size; + /// Check if we have this path in dynamic paths. + if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end()) + { + ReadBufferFromMemory buf(value.data(), value.size()); + getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings()); + } + /// Try to add a new dynamic path. + else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) + { + ReadBufferFromMemory buf(value.data(), value.size()); + getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings()); + } + /// Limit on dynamic paths is reached, add this path to shared data later. + else + { + paths_and_values_for_shared_data.emplace_back(path, value); + } + } + } + + /// Sort and insert all paths from paths_and_values_for_shared_data into shared data. + std::sort(paths_and_values_for_shared_data.begin(), paths_and_values_for_shared_data.end()); + const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues(); + for (const auto & [path, value] : paths_and_values_for_shared_data) + { + shared_data_paths->insertData(path.data(), path.size()); + shared_data_values->insertData(value.data(), value.size()); + } + + getSharedDataOffsets().push_back(shared_data_paths->size()); + + /// Insert default value in all remaining typed and dynamic paths. + + for (auto & [_, column] : typed_paths) + { + if (column->size() == current_size) + column->insertDefault(); + } + + for (auto & [_, column] : dynamic_paths_ptrs) + { + if (column->size() == current_size) + column->insertDefault(); + } + + return pos; +} + +const char * ColumnObject::skipSerializedInArena(const char * pos) const +{ + auto num_paths = unalignedLoad(pos); + pos += sizeof(size_t); + for (size_t i = 0; i != num_paths; ++i) + { + auto path_size = unalignedLoad(pos); + pos += sizeof(size_t); + std::string_view path(pos, path_size); + pos += path_size; + if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end()) + { + pos = typed_it->second->skipSerializedInArena(pos); + } + else + { + auto value_size = unalignedLoad(pos); + pos += sizeof(size_t) + value_size; + } + } + + return pos; +} + +void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const +{ + for (const auto & [_, column] : typed_paths) + column->updateHashWithValue(n, hash); + for (const auto & [_, column] : dynamic_paths_ptrs) + column->updateHashWithValue(n, hash); + shared_data->updateHashWithValue(n, hash); +} + +WeakHash32 ColumnObject::getWeakHash32() const +{ + WeakHash32 hash(size()); + for (const auto & [_, column] : typed_paths) + hash.update(column->getWeakHash32()); + for (const auto & [_, column] : dynamic_paths_ptrs) + hash.update(column->getWeakHash32()); + hash.update(shared_data->getWeakHash32()); + return hash; +} + +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & [_, column] : typed_paths) + column->updateHashFast(hash); + for (const auto & [_, column] : dynamic_paths_ptrs) + column->updateHashFast(hash); + shared_data->updateHashFast(hash); +} + +ColumnPtr ColumnObject::filter(const Filter & filt, ssize_t result_size_hint) const +{ + std::unordered_map filtered_typed_paths; + filtered_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + filtered_typed_paths[path] = column->filter(filt, result_size_hint); + + std::unordered_map filtered_dynamic_paths; + filtered_dynamic_paths.reserve(dynamic_paths_ptrs.size()); + for (const auto & [path, column] : dynamic_paths_ptrs) + filtered_dynamic_paths[path] = column->filter(filt, result_size_hint); + + auto filtered_shared_data = shared_data->filter(filt, result_size_hint); + return ColumnObject::create(filtered_typed_paths, filtered_dynamic_paths, filtered_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); +} + +void ColumnObject::expand(const Filter & mask, bool inverted) +{ + for (auto & [_, column] : typed_paths) + column->expand(mask, inverted); + for (auto & [_, column] : dynamic_paths_ptrs) + column->expand(mask, inverted); + shared_data->expand(mask, inverted); +} + +ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const +{ + std::unordered_map permuted_typed_paths; + permuted_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + permuted_typed_paths[path] = column->permute(perm, limit); + + std::unordered_map permuted_dynamic_paths; + permuted_dynamic_paths.reserve(dynamic_paths_ptrs.size()); + for (const auto & [path, column] : dynamic_paths_ptrs) + permuted_dynamic_paths[path] = column->permute(perm, limit); + + auto permuted_shared_data = shared_data->permute(perm, limit); + return ColumnObject::create(permuted_typed_paths, permuted_dynamic_paths, permuted_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); } ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const { - return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }); + std::unordered_map indexed_typed_paths; + indexed_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + indexed_typed_paths[path] = column->index(indexes, limit); + + std::unordered_map indexed_dynamic_paths; + indexed_dynamic_paths.reserve(dynamic_paths_ptrs.size()); + for (const auto & [path, column] : dynamic_paths_ptrs) + indexed_dynamic_paths[path] = column->index(indexes, limit); + + auto indexed_shared_data = shared_data->index(indexes, limit); + return ColumnObject::create(indexed_typed_paths, indexed_dynamic_paths, indexed_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); } -ColumnPtr ColumnObject::replicate(const Offsets & offsets) const +ColumnPtr ColumnObject::replicate(const Offsets & replicate_offsets) const { - return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.replicate(offsets); }); + std::unordered_map replicated_typed_paths; + replicated_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, column] : typed_paths) + replicated_typed_paths[path] = column->replicate(replicate_offsets); + + std::unordered_map replicated_dynamic_paths; + replicated_dynamic_paths.reserve(dynamic_paths_ptrs.size()); + for (const auto & [path, column] : dynamic_paths_ptrs) + replicated_dynamic_paths[path] = column->replicate(replicate_offsets); + + auto replicated_shared_data = shared_data->replicate(replicate_offsets); + return ColumnObject::create(replicated_typed_paths, replicated_dynamic_paths, replicated_shared_data, max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types); } -MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const +MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & selector) const { - if (new_size == 0) - return ColumnObject::create(is_nullable); + std::vector> scattered_typed_paths(num_columns); + for (auto & typed_paths_ : scattered_typed_paths) + typed_paths_.reserve(typed_paths.size()); - return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.cloneResized(new_size); }); + for (const auto & [path, column] : typed_paths) + { + auto scattered_columns = column->scatter(num_columns, selector); + for (size_t i = 0; i != num_columns; ++i) + scattered_typed_paths[i][path] = std::move(scattered_columns[i]); + } + + std::vector> scattered_dynamic_paths(num_columns); + for (auto & dynamic_paths_ : scattered_dynamic_paths) + dynamic_paths_.reserve(dynamic_paths_ptrs.size()); + + for (const auto & [path, column] : dynamic_paths_ptrs) + { + auto scattered_columns = column->scatter(num_columns, selector); + for (size_t i = 0; i != num_columns; ++i) + scattered_dynamic_paths[i][path] = std::move(scattered_columns[i]); + } + + auto scattered_shared_data_columns = shared_data->scatter(num_columns, selector); + MutableColumns result_columns; + result_columns.reserve(num_columns); + for (size_t i = 0; i != num_columns; ++i) + result_columns.emplace_back(ColumnObject::create(std::move(scattered_typed_paths[i]), std::move(scattered_dynamic_paths[i]), std::move(scattered_shared_data_columns[i]), max_dynamic_paths, global_max_dynamic_paths, max_dynamic_types)); + return result_columns; } void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const { - res.resize(num_rows); + /// Values in ColumnObject are not comparable. + res.resize(size()); iota(res.data(), res.size(), size_t(0)); } -void ColumnObject::getExtremes(Field & min, Field & max) const +void ColumnObject::reserve(size_t n) { - if (num_rows == 0) + for (auto & [_, column] : typed_paths) + column->reserve(n); + for (auto & [_, column] : dynamic_paths_ptrs) + column->reserve(n); + shared_data->reserve(n); +} + +size_t ColumnObject::capacity() const +{ + return shared_data->capacity(); +} + +void ColumnObject::ensureOwnership() +{ + for (auto & [_, column] : typed_paths) + column->ensureOwnership(); + for (auto & [_, column] : dynamic_paths_ptrs) + column->ensureOwnership(); + shared_data->ensureOwnership(); +} + +size_t ColumnObject::byteSize() const +{ + size_t size = 0; + for (const auto & [_, column] : typed_paths) + size += column->byteSize(); + for (const auto & [_, column] : dynamic_paths_ptrs) + size += column->byteSize(); + size += shared_data->byteSize(); + return size; +} + +size_t ColumnObject::byteSizeAt(size_t n) const +{ + size_t size = 0; + for (const auto & [_, column] : typed_paths) + size += column->byteSizeAt(n); + for (const auto & [_, column] : dynamic_paths_ptrs) + size += column->byteSizeAt(n); + size += shared_data->byteSizeAt(n); + return size; +} + +size_t ColumnObject::allocatedBytes() const +{ + size_t size = 0; + for (const auto & [_, column] : typed_paths) + size += column->allocatedBytes(); + for (const auto & [_, column] : dynamic_paths_ptrs) + size += column->allocatedBytes(); + size += shared_data->allocatedBytes(); + return size; +} + +void ColumnObject::protect() +{ + for (auto & [_, column] : typed_paths) + column->protect(); + for (auto & [_, column] : dynamic_paths_ptrs) + column->protect(); + shared_data->protect(); +} + +void ColumnObject::forEachSubcolumn(DB::IColumn::MutableColumnCallback callback) +{ + for (auto & [_, column] : typed_paths) + callback(column); + for (auto & [path, column] : dynamic_paths) + { + callback(column); + dynamic_paths_ptrs[path] = assert_cast(column.get()); + } + callback(shared_data); +} + +void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColumnCallback callback) +{ + for (auto & [_, column] : typed_paths) + { + callback(*column); + column->forEachSubcolumnRecursively(callback); + } + for (auto & [path, column] : dynamic_paths) + { + callback(*column); + column->forEachSubcolumnRecursively(callback); + dynamic_paths_ptrs[path] = assert_cast(column.get()); + } + callback(*shared_data); + shared_data->forEachSubcolumnRecursively(callback); +} + +bool ColumnObject::structureEquals(const IColumn & rhs) const +{ + /// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types. + const auto * rhs_object = typeid_cast(&rhs); + if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types) + return false; + + for (const auto & [path, column] : typed_paths) + { + auto it = rhs_object->typed_paths.find(path); + if (it == rhs_object->typed_paths.end() || !it->second->structureEquals(*column)) + return false; + } + + return true; +} + +ColumnPtr ColumnObject::compress() const +{ + std::unordered_map compressed_typed_paths; + compressed_typed_paths.reserve(typed_paths.size()); + size_t byte_size = 0; + for (const auto & [path, column] : typed_paths) + { + auto compressed_column = column->compress(); + byte_size += compressed_column->byteSize(); + compressed_typed_paths[path] = std::move(compressed_column); + } + + std::unordered_map compressed_dynamic_paths; + compressed_dynamic_paths.reserve(dynamic_paths_ptrs.size()); + for (const auto & [path, column] : dynamic_paths_ptrs) + { + auto compressed_column = column->compress(); + byte_size += compressed_column->byteSize(); + compressed_dynamic_paths[path] = std::move(compressed_column); + } + + auto compressed_shared_data = shared_data->compress(); + byte_size += compressed_shared_data->byteSize(); + + auto decompress = + [my_compressed_typed_paths = std::move(compressed_typed_paths), + my_compressed_dynamic_paths = std::move(compressed_dynamic_paths), + my_compressed_shared_data = std::move(compressed_shared_data), + my_max_dynamic_paths = max_dynamic_paths, + my_global_max_dynamic_paths = global_max_dynamic_paths, + my_max_dynamic_types = max_dynamic_types, + my_statistics = statistics]() mutable + { + std::unordered_map decompressed_typed_paths; + decompressed_typed_paths.reserve(my_compressed_typed_paths.size()); + for (const auto & [path, column] : my_compressed_typed_paths) + decompressed_typed_paths[path] = column->decompress(); + + std::unordered_map decompressed_dynamic_paths; + decompressed_dynamic_paths.reserve(my_compressed_dynamic_paths.size()); + for (const auto & [path, column] : my_compressed_dynamic_paths) + decompressed_dynamic_paths[path] = column->decompress(); + + auto decompressed_shared_data = my_compressed_shared_data->decompress(); + return ColumnObject::create(decompressed_typed_paths, decompressed_dynamic_paths, decompressed_shared_data, my_max_dynamic_paths, my_global_max_dynamic_paths, my_max_dynamic_types, my_statistics); + }; + + return ColumnCompressed::create(size(), byte_size, decompress); +} + +void ColumnObject::finalize() +{ + for (auto & [_, column] : typed_paths) + column->finalize(); + for (auto & [_, column] : dynamic_paths_ptrs) + column->finalize(); + shared_data->finalize(); +} + +bool ColumnObject::isFinalized() const +{ + bool finalized = true; + for (const auto & [_, column] : typed_paths) + finalized &= column->isFinalized(); + for (const auto & [_, column] : dynamic_paths_ptrs) + finalized &= column->isFinalized(); + finalized &= shared_data->isFinalized(); + return finalized; +} + +void ColumnObject::getExtremes(DB::Field & min, DB::Field & max) const +{ + if (empty()) { min = Object(); max = Object(); @@ -884,227 +1144,311 @@ void ColumnObject::getExtremes(Field & min, Field & max) const } } -const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const +void ColumnObject::prepareForSquashing(const std::vector & source_columns) { - if (const auto * node = subcolumns.findLeaf(key)) - return node->data; + if (source_columns.empty()) + return; - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath()); -} + /// Dynamic paths of source Object columns may differ. + /// We want to preallocate memory for all dynamic paths we will have after squashing. + /// It may happen that the total number of dynamic paths in source columns will + /// exceed the limit, in this case we will choose the most frequent paths. + std::unordered_map path_to_total_number_of_non_null_values; -ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) -{ - if (const auto * node = subcolumns.findLeaf(key)) - return const_cast(node)->data; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath()); -} - -bool ColumnObject::hasSubcolumn(const PathInData & key) const -{ - return subcolumns.findLeaf(key) != nullptr; -} - -void ColumnObject::addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn) -{ - size_t new_size = subcolumn->size(); - bool inserted = subcolumns.add(key, Subcolumn(std::move(subcolumn), is_nullable)); - - if (!inserted) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); - - if (num_rows == 0) - num_rows = new_size; - else if (new_size != num_rows) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, - "Size of subcolumn {} ({}) is inconsistent with column size ({})", - key.getPath(), new_size, num_rows); -} - -void ColumnObject::addSubcolumn(const PathInData & key, size_t new_size) -{ - bool inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); - if (!inserted) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); - - if (num_rows == 0) - num_rows = new_size; - else if (new_size != num_rows) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, - "Required size of subcolumn {} ({}) is inconsistent with column size ({})", - key.getPath(), new_size, num_rows); -} - -void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size) -{ - if (!key.hasNested()) - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, - "Cannot add Nested subcolumn, because path doesn't contain Nested"); - - bool inserted = false; - /// We find node that represents the same Nested type as @key. - const auto * nested_node = subcolumns.findBestMatch(key); - - if (nested_node) + auto add_dynamic_paths = [&](const ColumnObject & source_object) { - /// Find any leaf of Nested subcolumn. - const auto * leaf = Subcolumns::findLeaf(nested_node, [&](const auto &) { return true; }); - assert(leaf); + for (const auto & [path, dynamic_column_ptr] : source_object.dynamic_paths_ptrs) + { + auto it = path_to_total_number_of_non_null_values.find(path); + if (it == path_to_total_number_of_non_null_values.end()) + it = path_to_total_number_of_non_null_values.emplace(path, 0).first; + it->second += (dynamic_column_ptr->size() - dynamic_column_ptr->getNumberOfDefaultRows()); + } + }; - /// Recreate subcolumn with default values and the same sizes of arrays. - auto new_subcolumn = leaf->data.recreateWithDefaultValues(field_info); + for (const auto & source_column : source_columns) + add_dynamic_paths(assert_cast(*source_column)); - /// It's possible that we have already inserted value from current row - /// to this subcolumn. So, adjust size to expected. - if (new_subcolumn.size() > new_size) - new_subcolumn.popBack(new_subcolumn.size() - new_size); + /// Add dynamic paths from this object column. + add_dynamic_paths(*this); - assert(new_subcolumn.size() == new_size); - inserted = subcolumns.add(key, new_subcolumn); + /// Check if the number of all dynamic paths exceeds the limit. + if (path_to_total_number_of_non_null_values.size() > max_dynamic_paths) + { + /// We want to keep the most frequent paths in the resulting object column. + /// Sort paths by total number of non null values. + /// Don't include paths from current column as we cannot change them. + std::vector> paths_with_sizes; + paths_with_sizes.reserve(path_to_total_number_of_non_null_values.size() - dynamic_paths.size()); + for (const auto & [path, size] : path_to_total_number_of_non_null_values) + { + if (!dynamic_paths.contains(path)) + paths_with_sizes.emplace_back(size, path); + } + std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater()); + + /// Fill dynamic_paths with first paths in sorted list until we reach the limit. + size_t paths_to_add = max_dynamic_paths - dynamic_paths.size(); + for (size_t i = 0; i != paths_to_add; ++i) + addNewDynamicPath(paths_with_sizes[i].second); } + /// Otherwise keep all paths. else { - /// If node was not found just add subcolumn with empty arrays. - inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); + /// Create columns for new dynamic paths. + for (const auto & [path, _] : path_to_total_number_of_non_null_values) + { + if (!dynamic_paths.contains(path)) + addNewDynamicPath(path); + } } - if (!inserted) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); + /// Now current object column has all resulting dynamic paths and we can call + /// prepareForSquashing on them to preallocate the memory. + /// Also we can preallocate memory for dynamic paths and shared data. + Columns shared_data_source_columns; + shared_data_source_columns.reserve(source_columns.size()); + std::unordered_map typed_paths_source_columns; + typed_paths_source_columns.reserve(typed_paths.size()); + std::unordered_map dynamic_paths_source_columns; + dynamic_paths_source_columns.reserve(dynamic_paths.size()); - if (num_rows == 0) - num_rows = new_size; - else if (new_size != num_rows) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, - "Required size of subcolumn {} ({}) is inconsistent with column size ({})", - key.getPath(), new_size, num_rows); + for (const auto & [path, column] : typed_paths) + typed_paths_source_columns[path].reserve(source_columns.size()); + + for (const auto & [path, column] : dynamic_paths) + dynamic_paths_source_columns[path].reserve(source_columns.size()); + + size_t total_size = 0; + for (const auto & source_column : source_columns) + { + const auto & source_object_column = assert_cast(*source_column); + total_size += source_object_column.size(); + shared_data_source_columns.push_back(source_object_column.shared_data); + + for (const auto & [path, column] : source_object_column.typed_paths) + typed_paths_source_columns.at(path).push_back(column); + + for (const auto & [path, column] : source_object_column.dynamic_paths) + { + if (dynamic_paths.contains(path)) + dynamic_paths_source_columns.at(path).push_back(column); + } + } + + shared_data->prepareForSquashing(shared_data_source_columns); + + for (const auto & [path, source_typed_columns] : typed_paths_source_columns) + typed_paths[path]->prepareForSquashing(source_typed_columns); + + for (const auto & [path, source_dynamic_columns] : dynamic_paths_source_columns) + { + /// ColumnDynamic::prepareForSquashing may not preallocate enough memory for discriminators and offsets + /// because source columns may not have this dynamic path (and so dynamic columns filled with nulls). + /// For this reason we first call ColumnDynamic::reserve with resulting size to preallocate memory for + /// discriminators and offsets and ColumnDynamic::prepareVariantsForSquashing to preallocate memory + /// for all variants inside Dynamic. + dynamic_paths_ptrs[path]->reserve(total_size); + dynamic_paths_ptrs[path]->prepareVariantsForSquashing(source_dynamic_columns); + } } -const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const +void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) { - if (!entry->path.hasNested()) - return nullptr; + if (!empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Object column"); - size_t old_size = entry->data.size(); - const auto * current_node = subcolumns.findLeaf(entry->path); - const Subcolumns::Node * leaf = nullptr; + /// During serialization of Object column in MergeTree all Object columns + /// in single part must have the same structure (the same dynamic paths). During merge + /// resulting column is constructed by inserting from source columns, + /// but it may happen that resulting column doesn't have rows from all source parts + /// but only from subset of them, and as a result some dynamic paths could be missing + /// and structures of resulting column may differ. + /// To solve this problem, before merge we create empty resulting column and use this method + /// to take dynamic structure from all source columns even if we won't insert + /// rows from some of them. - while (current_node) + /// We want to construct resulting set of dynamic paths with paths that have least number of null values in source columns + /// and insert the rest paths into shared data if we exceed the limit of dynamic paths. + /// First, collect all dynamic paths from all source columns and calculate total number of non-null values. + std::unordered_map path_to_total_number_of_non_null_values; + for (const auto & source_column : source_columns) { - /// Try to find the first Nested up to the current node. - const auto * node_nested = Subcolumns::findParent(current_node, - [](const auto & candidate) { return candidate.isNested(); }); - - if (!node_nested) - break; - - /// Find the leaf with subcolumn that contains values - /// for the last rows. - /// If there are no leaves, skip current node and find - /// the next node up to the current. - leaf = Subcolumns::findLeaf(node_nested, - [&](const auto & candidate) + const auto & source_object = assert_cast(*source_column); + /// During deserialization from MergeTree we will have statistics from the whole + /// data part with number of non null values for each dynamic path. + const auto & source_statistics = source_object.getStatistics(); + for (const auto & [path, column_ptr] : source_object.dynamic_paths_ptrs) + { + auto it = path_to_total_number_of_non_null_values.find(path); + if (it == path_to_total_number_of_non_null_values.end()) + it = path_to_total_number_of_non_null_values.emplace(path, 0).first; + size_t size = column_ptr->size() - column_ptr->getNumberOfDefaultRows(); + if (source_statistics) { - return candidate.data.size() > old_size; - }); + auto statistics_it = source_statistics->dynamic_paths_statistics.find(path); + if (statistics_it != source_statistics->dynamic_paths_statistics.end()) + size = statistics_it->second; + } + it->second += size; + } - if (leaf) - break; - - current_node = node_nested->parent; + /// Add paths from shared data statistics. It can helo extracting frequent paths + /// from shared data to dynamic paths. + if (source_statistics) + { + for (const auto & [path, size] : source_statistics->shared_data_paths_statistics) + { + auto it = path_to_total_number_of_non_null_values.find(path); + if (it == path_to_total_number_of_non_null_values.end()) + it = path_to_total_number_of_non_null_values.emplace(path, 0).first; + it->second += size; + } + } } - if (leaf && isNothing(leaf->data.getLeastCommonTypeBase())) - return nullptr; + /// Reset current state. + dynamic_paths.clear(); + dynamic_paths_ptrs.clear(); + max_dynamic_paths = global_max_dynamic_paths; + Statistics new_statistics(Statistics::Source::MERGE); - return leaf; -} - -bool ColumnObject::tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const -{ - const auto * leaf = getLeafOfTheSameNested(entry); - if (!leaf) - return false; - - size_t old_size = entry->data.size(); - auto field_info = entry->data.getFieldInfo(); - - /// Cut the needed range from the found leaf - /// and replace scalar values to the correct - /// default values for given entry. - auto new_subcolumn = leaf->data - .cut(old_size, leaf->data.size() - old_size) - .recreateWithDefaultValues(field_info); - - entry->data.insertRangeFrom(new_subcolumn, 0, new_subcolumn.size()); - return true; -} - -bool ColumnObject::tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const -{ - const auto * leaf = getLeafOfTheSameNested(entry); - if (!leaf) - return false; - - auto last_field = leaf->data.getLastField(); - if (last_field.isNull()) - return false; - - size_t leaf_num_dimensions = leaf->data.getNumberOfDimensions(); - size_t entry_num_dimensions = entry->data.getNumberOfDimensions(); - - auto default_scalar = entry_num_dimensions > leaf_num_dimensions - ? createEmptyArrayField(entry_num_dimensions - leaf_num_dimensions) - : entry->data.getLeastCommonTypeBase()->getDefault(); - - auto default_field = applyVisitor(FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field); - entry->data.insert(std::move(default_field)); - return true; -} - -PathsInData ColumnObject::getKeys() const -{ - PathsInData keys; - keys.reserve(subcolumns.size()); - for (const auto & entry : subcolumns) - keys.emplace_back(entry->path); - return keys; -} - -bool ColumnObject::isFinalized() const -{ - return std::all_of(subcolumns.begin(), subcolumns.end(), - [](const auto & entry) { return entry->data.isFinalized(); }); -} - -void ColumnObject::finalize() -{ - size_t old_size = size(); - Subcolumns new_subcolumns; - for (auto && entry : subcolumns) + /// Check if the number of all dynamic paths exceeds the limit. + if (path_to_total_number_of_non_null_values.size() > max_dynamic_paths) { - const auto & least_common_type = entry->data.getLeastCommonType(); + /// Sort paths by total number of non null values. + std::vector> paths_with_sizes; + paths_with_sizes.reserve(path_to_total_number_of_non_null_values.size()); + for (const auto & [path, size] : path_to_total_number_of_non_null_values) + paths_with_sizes.emplace_back(size, path); + std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater()); - /// Do not add subcolumns, which consist only from NULLs. - if (isNothing(getBaseTypeOfArray(least_common_type))) - continue; - - entry->data.finalize(); - new_subcolumns.add(entry->path, entry->data); + /// Fill dynamic_paths with first max_dynamic_paths paths in sorted list. + for (const auto & [size, path] : paths_with_sizes) + { + if (dynamic_paths.size() < max_dynamic_paths) + { + dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types)); + dynamic_paths_ptrs.emplace(path, assert_cast(dynamic_paths.find(path)->second.get())); + } + /// Add all remaining paths into shared data statistics until we reach its max size; + else if (new_statistics.shared_data_paths_statistics.size() < Statistics::MAX_SHARED_DATA_STATISTICS_SIZE) + { + new_statistics.shared_data_paths_statistics.emplace(path, size); + } + } + } + /// Use all dynamic paths from all source columns. + else + { + for (const auto & [path, _] : path_to_total_number_of_non_null_values) + { + dynamic_paths[path] = ColumnDynamic::create(max_dynamic_types); + dynamic_paths_ptrs[path] = assert_cast(dynamic_paths[path].get()); + } } - /// If all subcolumns were skipped add a dummy subcolumn, - /// because Tuple type must have at least one element. - if (new_subcolumns.empty()) - new_subcolumns.add(PathInData{COLUMN_NAME_DUMMY}, Subcolumn{ColumnUInt8::create(old_size, 0), is_nullable}); + /// Fill statistics for the merged part. + for (const auto & [path, _] : dynamic_paths) + new_statistics.dynamic_paths_statistics[path] = path_to_total_number_of_non_null_values[path]; + statistics = std::make_shared(std::move(new_statistics)); - std::swap(subcolumns, new_subcolumns); - checkObjectHasNoAmbiguosPaths(getKeys()); + /// Set max_dynamic_paths to the number of selected dynamic paths. + /// It's needed to avoid adding new unexpected dynamic paths during inserts into this column during merge. + max_dynamic_paths = dynamic_paths.size(); + + /// Now we have the resulting set of dynamic paths that will be used in all merged columns. + /// As we use Dynamic column for dynamic paths, we should call takeDynamicStructureFromSourceColumns + /// on all resulting dynamic columns. + for (auto & [path, column] : dynamic_paths) + { + Columns dynamic_path_source_columns; + for (const auto & source_column : source_columns) + { + const auto & source_object = assert_cast(*source_column); + auto it = source_object.dynamic_paths.find(path); + if (it != source_object.dynamic_paths.end()) + dynamic_path_source_columns.push_back(it->second); + } + column->takeDynamicStructureFromSourceColumns(dynamic_path_source_columns); + } + + /// Typed paths also can contain types with dynamic structure. + for (auto & [path, column] : typed_paths) + { + Columns typed_path_source_columns; + typed_path_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + typed_path_source_columns.push_back(assert_cast(*source_column).typed_paths.at(path)); + column->takeDynamicStructureFromSourceColumns(typed_path_source_columns); + } } -void ColumnObject::updateHashFast(SipHash & hash) const +size_t ColumnObject::findPathLowerBoundInSharedData(StringRef path, const ColumnString & shared_data_paths, size_t start, size_t end) { - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); + /// Simple random access iterator over values in ColumnString in specified range. + class Iterator + { + public: + using difference_type = size_t; + using value_type = StringRef; + using iterator_category = std::random_access_iterator_tag; + using pointer = StringRef*; + using reference = StringRef&; + + Iterator() = delete; + Iterator(const ColumnString * data_, size_t index_) : data(data_), index(index_) {} + Iterator(const Iterator & rhs) = default; + Iterator & operator=(const Iterator & rhs) = default; + inline Iterator& operator+=(difference_type rhs) { index += rhs; return *this;} + inline StringRef operator*() const {return data->getDataAt(index);} + + inline Iterator& operator++() { ++index; return *this; } + inline difference_type operator-(const Iterator & rhs) const {return index - rhs.index; } + + const ColumnString * data; + size_t index; + }; + + Iterator start_it(&shared_data_paths, start); + Iterator end_it(&shared_data_paths, end); + auto it = std::lower_bound(start_it, end_it, path); + return it.index; } + +void ColumnObject::fillPathColumnFromSharedData(IColumn & path_column, StringRef path, const ColumnPtr & shared_data_column, size_t start, size_t end) +{ + const auto & shared_data_array = assert_cast(*shared_data_column); + const auto & shared_data_offsets = shared_data_array.getOffsets(); + size_t first_offset = shared_data_offsets[static_cast(start) - 1]; + size_t last_offset = shared_data_offsets[static_cast(end) - 1]; + /// Check if we have at least one row with data. + if (first_offset == last_offset) + { + path_column.insertManyDefaults(end - start); + return; + } + + const auto & shared_data_tuple = assert_cast(shared_data_array.getData()); + const auto & shared_data_paths = assert_cast(shared_data_tuple.getColumn(0)); + const auto & shared_data_values = assert_cast(shared_data_tuple.getColumn(1)); + const auto & dynamic_serialization = getDynamicSerialization(); + for (size_t i = start; i != end; ++i) + { + size_t paths_start = shared_data_offsets[static_cast(i) - 1]; + size_t paths_end = shared_data_offsets[static_cast(i)]; + auto lower_bound_path_index = ColumnObject::findPathLowerBoundInSharedData(path, shared_data_paths, paths_start, paths_end); + if (lower_bound_path_index != paths_end && shared_data_paths.getDataAt(lower_bound_path_index) == path) + { + auto value_data = shared_data_values.getDataAt(lower_bound_path_index); + ReadBufferFromMemory buf(value_data.data, value_data.size); + dynamic_serialization->deserializeBinary(path_column, buf, getFormatSettings()); + } + else + { + path_column.insertDefault(); + } + } +} + } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 25cfaa550f6..f530ed29ef3 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -1,216 +1,117 @@ #pragma once #include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/// Info that represents a scalar or array field in a decomposed view. -/// It allows to recreate field with different number -/// of dimensions or nullability. -struct FieldInfo -{ - /// The common type of of all scalars in field. - DataTypePtr scalar_type; - - /// Do we have NULL scalar in field. - bool have_nulls; - - /// If true then we have scalars with different types in array and - /// we need to convert scalars to the common type. - bool need_convert; - - /// Number of dimension in array. 0 if field is scalar. - size_t num_dimensions; - - /// If true then this field is an array of variadic dimension field - /// and we need to normalize the dimension - bool need_fold_dimension; -}; - -FieldInfo getFieldInfo(const Field & field); - -/** A column that represents object with dynamic set of subcolumns. - * Subcolumns are identified by paths in document and are stored in - * a trie-like structure. ColumnObject is not suitable for writing into tables - * and it should be converted to Tuple with fixed set of subcolumns before that. - */ class ColumnObject final : public COWHelper, ColumnObject> { public: - /** Class that represents one subcolumn. - * It stores values in several parts of column - * and keeps current common type of all parts. - * We add a new column part with a new type, when we insert a field, - * which can't be converted to the current common type. - * After insertion of all values subcolumn should be finalized - * for writing and other operations. - */ - class Subcolumn + struct Statistics { - public: - Subcolumn() = default; - Subcolumn(size_t size_, bool is_nullable_); - Subcolumn(MutableColumnPtr && data_, bool is_nullable_); - - size_t size() const; - size_t byteSize() const; - size_t allocatedBytes() const; - void get(size_t n, Field & res) const; - - bool isFinalized() const; - const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); } - const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); } - size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); } - - /// Checks the consistency of column's parts stored in @data. - void checkTypes() const; - - /// Inserts a field, which scalars can be arbitrary, but number of - /// dimensions should be consistent with current common type. - void insert(Field field); - void insert(Field field, FieldInfo info); - - void insertDefault(); - void insertManyDefaults(size_t length); - void insertRangeFrom(const Subcolumn & src, size_t start, size_t length); - void popBack(size_t n); - - Subcolumn cut(size_t start, size_t length) const; - - /// Converts all column's parts to the common type and - /// creates a single column that stores all values. - void finalize(); - - /// Returns last inserted field. - Field getLastField() const; - - FieldInfo getFieldInfo() const; - - /// Recreates subcolumn with default scalar values and keeps sizes of arrays. - /// Used to create columns of type Nested with consistent array sizes. - Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const; - - /// Returns single column if subcolumn in finalizes. - /// Otherwise -- undefined behaviour. - IColumn & getFinalizedColumn(); - const IColumn & getFinalizedColumn() const; - const ColumnPtr & getFinalizedColumnPtr() const; - - const std::vector & getData() const { return data; } - size_t getNumberOfDefaultsInPrefix() const { return num_of_defaults_in_prefix; } - - friend class ColumnObject; - - private: - class LeastCommonType + enum class Source { - public: - LeastCommonType(); - explicit LeastCommonType(DataTypePtr type_); - - const DataTypePtr & get() const { return type; } - const DataTypePtr & getBase() const { return base_type; } - size_t getNumberOfDimensions() const { return num_dimensions; } - - private: - DataTypePtr type; - DataTypePtr base_type; - size_t num_dimensions = 0; + READ, /// Statistics were loaded into column during reading from MergeTree. + MERGE, /// Statistics were calculated during merge of several MergeTree parts. }; - void addNewColumnPart(DataTypePtr type); + explicit Statistics(Source source_) : source(source_) {} - /// Current least common type of all values inserted to this subcolumn. - LeastCommonType least_common_type; - - /// If true then common type type of subcolumn is Nullable - /// and default values are NULLs. - bool is_nullable = false; - - /// Parts of column. Parts should be in increasing order in terms of subtypes/supertypes. - /// That means that the least common type for i-th prefix is the type of i-th part - /// and it's the supertype for all type of column from 0 to i-1. - std::vector data; - - /// Until we insert any non-default field we don't know further - /// least common type and we count number of defaults in prefix, - /// which will be converted to the default type of final common type. - size_t num_of_defaults_in_prefix = 0; - - size_t num_rows = 0; + /// Source of the statistics. + Source source; + /// Statistics for dynamic paths: (path) -> (total number of not-null values). + std::unordered_map dynamic_paths_statistics; + /// Statistics for paths in shared data: path) -> (total number of not-null values). + /// We don't store statistics for all paths in shared data but only for some subset of them + /// (is 10000 a good limit? It should not be expensive to store 10000 paths per part) + static const size_t MAX_SHARED_DATA_STATISTICS_SIZE = 10000; + std::unordered_map shared_data_paths_statistics; }; - using Subcolumns = SubcolumnsTree; + using StatisticsPtr = std::shared_ptr; private: - /// If true then all subcolumns are nullable. - const bool is_nullable; + friend class COWHelper, ColumnObject>; - Subcolumns subcolumns; - size_t num_rows; + ColumnObject(std::unordered_map typed_paths_, size_t max_dynamic_paths_, size_t max_dynamic_types_); + ColumnObject( + std::unordered_map typed_paths_, + std::unordered_map dynamic_paths_, + MutableColumnPtr shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const StatisticsPtr & statistics_ = {}); + /// Use StringHashForHeterogeneousLookup hash for hash maps to be able to use std::string_view in find() method. + using PathToColumnMap = std::unordered_map; + using PathToDynamicColumnPtrMap = std::unordered_map; public: - static constexpr auto COLUMN_NAME_DUMMY = "_dummy"; + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. + * Use mutate in order to make mutable column and mutate shared nested columns. + */ + using Base = COWHelper, ColumnObject>; - explicit ColumnObject(bool is_nullable_); - ColumnObject(Subcolumns && subcolumns_, bool is_nullable_); + static Ptr create( + const std::unordered_map & typed_paths_, + const std::unordered_map & dynamic_paths_, + const ColumnPtr & shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const StatisticsPtr & statistics_ = {}); - /// Checks that all subcolumns have consistent sizes. - void checkConsistency() const; + static MutablePtr create( + std::unordered_map typed_paths_, + std::unordered_map dynamic_paths_, + MutableColumnPtr shared_data_, + size_t max_dynamic_paths_, + size_t global_max_dynamic_paths_, + size_t max_dynamic_types_, + const StatisticsPtr & statistics_ = {}); - bool hasSubcolumn(const PathInData & key) const; + static MutablePtr create(std::unordered_map typed_paths_, size_t max_dynamic_paths_, size_t max_dynamic_types_); - const Subcolumn & getSubcolumn(const PathInData & key) const; - Subcolumn & getSubcolumn(const PathInData & key); + std::string getName() const override; - void incrementNumRows() { ++num_rows; } + const char * getFamilyName() const override + { + return "Object"; + } - /// Adds a subcolumn from existing IColumn. - void addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn); + TypeIndex getDataType() const override + { + return TypeIndex::Object; + } - /// Adds a subcolumn of specific size with default values. - void addSubcolumn(const PathInData & key, size_t new_size); + MutableColumnPtr cloneEmpty() const override; + MutableColumnPtr cloneResized(size_t size) const override; - /// Adds a subcolumn of type Nested of specific size with default values. - /// It cares about consistency of sizes of Nested arrays. - void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size); + size_t size() const override + { + return shared_data->size(); + } - /// Finds a subcolumn from the same Nested type as @entry and inserts - /// an array with default values with consistent sizes as in Nested type. - bool tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const; - bool tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const; + Field operator[](size_t n) const override; + void get(size_t n, Field & res) const override; - const Subcolumns & getSubcolumns() const { return subcolumns; } - Subcolumns & getSubcolumns() { return subcolumns; } - PathsInData getKeys() const; - - /// Part of interface - - const char * getFamilyName() const override { return "Object"; } - TypeIndex getDataType() const override { return TypeIndex::Object; } - - size_t size() const override; - size_t byteSize() const override; - size_t allocatedBytes() const override; - void forEachSubcolumn(MutableColumnCallback callback) override; - void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override; - void insert(const Field & field) override; - bool tryInsert(const Field & field) override; - void insertDefault() override; + bool isDefaultAt(size_t n) const override; + StringRef getDataAt(size_t n) const override; + void insertData(const char * pos, size_t length) override; + void insert(const Field & x) override; + bool tryInsert(const Field & x) override; #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -218,24 +119,31 @@ public: void doInsertFrom(const IColumn & src, size_t n) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif + /// TODO: implement more optimal insertManyFrom + void insertDefault() override; + void insertManyDefaults(size_t length) override; - void popBack(size_t length) override; - Field operator[](size_t n) const override; - void get(size_t n, Field & res) const override; + void popBack(size_t n) override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + const char * deserializeAndInsertFromArena(const char * pos) override; + const char * skipSerializedInArena(const char * pos) const override; + + void updateHashWithValue(size_t n, SipHash & hash) const override; + WeakHash32 getWeakHash32() const override; + void updateHashFast(SipHash & hash) const override; + + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; + void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; - ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; - ColumnPtr replicate(const Offsets & offsets) const override; - MutableColumnPtr cloneResized(size_t new_size) const override; + ColumnPtr replicate(const Offsets & replicate_offsets) const override; + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; - /// Finalizes all subcolumns. - void finalize() override; - bool isFinalized() const override; - - /// Order of rows in ColumnObject is undefined. - void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; + void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} + + /// Values of ColumnObject are not comparable. #if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else @@ -243,35 +151,118 @@ public: #endif void getExtremes(Field & min, Field & max) const override; - /// All other methods throw exception. + void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const std::vector & source_columns) override; + void ensureOwnership() override; + size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; + size_t allocatedBytes() const override; + void protect() override; - StringRef getDataAt(size_t) const override { throwMustBeConcrete(); } - bool isDefaultAt(size_t) const override { throwMustBeConcrete(); } - void insertData(const char *, size_t) override { throwMustBeConcrete(); } - StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); } - char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeConcrete(); } - const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } - const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } - void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } - WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; - void expand(const Filter &, bool) override { throwMustBeConcrete(); } - bool hasEqualValues() const override { throwMustBeConcrete(); } - size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } - double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); } - UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); } - void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); } + void forEachSubcolumn(MutableColumnCallback callback) override; -private: - [[noreturn]] static void throwMustBeConcrete() + void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override; + + bool structureEquals(const IColumn & rhs) const override; + + ColumnPtr compress() const override; + + void finalize() override; + bool isFinalized() const override; + + bool hasDynamicStructure() const override { return true; } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + + const PathToColumnMap & getTypedPaths() const { return typed_paths; } + PathToColumnMap & getTypedPaths() { return typed_paths; } + + const PathToColumnMap & getDynamicPaths() const { return dynamic_paths; } + PathToColumnMap & getDynamicPaths() { return dynamic_paths; } + + const PathToDynamicColumnPtrMap & getDynamicPathsPtrs() const { return dynamic_paths_ptrs; } + PathToDynamicColumnPtrMap & getDynamicPathsPtrs() { return dynamic_paths_ptrs; } + + const StatisticsPtr & getStatistics() const { return statistics; } + + const ColumnPtr & getSharedDataPtr() const { return shared_data; } + ColumnPtr & getSharedDataPtr() { return shared_data; } + IColumn & getSharedDataColumn() { return *shared_data; } + + const ColumnArray & getSharedDataNestedColumn() const { return assert_cast(*shared_data); } + ColumnArray & getSharedDataNestedColumn() { return assert_cast(*shared_data); } + + ColumnArray::Offsets & getSharedDataOffsets() { return assert_cast(*shared_data).getOffsets(); } + const ColumnArray::Offsets & getSharedDataOffsets() const { return assert_cast(*shared_data).getOffsets(); } + + std::pair getSharedDataPathsAndValues() { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ColumnObject must be converted to ColumnTuple before use"); + auto & column_array = assert_cast(*shared_data); + auto & column_tuple = assert_cast(column_array.getData()); + return {assert_cast(&column_tuple.getColumn(0)), assert_cast(&column_tuple.getColumn(1))}; } - template - MutableColumnPtr applyForSubcolumns(Func && func) const; + std::pair getSharedDataPathsAndValues() const + { + const auto & column_array = assert_cast(*shared_data); + const auto & column_tuple = assert_cast(column_array.getData()); + return {assert_cast(&column_tuple.getColumn(0)), assert_cast(&column_tuple.getColumn(1))}; + } - /// It's used to get shared sized of Nested to insert correct default values. - const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const; + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + size_t getMaxDynamicPaths() const { return max_dynamic_paths; } + size_t getGlobalMaxDynamicPaths() const { return global_max_dynamic_paths; } + + /// Try to add new dynamic path. Returns pointer to the new dynamic + /// path column or nullptr if limit on dynamic paths is reached. + ColumnDynamic * tryToAddNewDynamicPath(std::string_view path); + /// Throws an exception if cannot add. + void addNewDynamicPath(std::string_view path); + + void setDynamicPaths(const std::vector & paths); + void setMaxDynamicPaths(size_t max_dynamic_paths_); + void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; } + + void serializePathAndValueIntoSharedData(ColumnString * shared_data_paths, ColumnString * shared_data_values, std::string_view path, const IColumn & column, size_t n); + void deserializeValueFromSharedData(const ColumnString * shared_data_values, size_t n, IColumn & column) const; + + /// Paths in shared data are sorted in each row. Use this method to find the lower bound for specific path in the row. + static size_t findPathLowerBoundInSharedData(StringRef path, const ColumnString & shared_data_paths, size_t start, size_t end); + /// Insert all the data from shared data with specified path to dynamic column. + static void fillPathColumnFromSharedData(IColumn & path_column, StringRef path, const ColumnPtr & shared_data_column, size_t start, size_t end); + +private: + void insertFromSharedDataAndFillRemainingDynamicPaths(const ColumnObject & src_object_column, std::vector && src_dynamic_paths_for_shared_data, size_t start, size_t length); + void serializePathAndValueIntoArena(Arena & arena, const char *& begin, StringRef path, StringRef value, StringRef & res) const; + + /// Map path -> column for paths with explicitly specified types. + /// This set of paths is constant and cannot be changed. + PathToColumnMap typed_paths; + /// Map path -> column for dynamically added paths. All columns + /// here are Dynamic columns. This set of paths can be extended + /// during inerts into the column. + PathToColumnMap dynamic_paths; + /// Store and use pointers to ColumnDynamic to avoid virtual calls. + /// With hundreds of dynamic paths these virtual calls are noticeable. + PathToDynamicColumnPtrMap dynamic_paths_ptrs; + /// Shared storage for all other paths and values. It's filled + /// when the number of dynamic paths reaches the limit. + /// It has type Array(Tuple(String, String)) and stores + /// an array of pairs (path, binary serialized dynamic value) for each row. + WrappedPtr shared_data; + + /// Maximum number of dynamic paths. If this limit is reached, all new paths will be inserted into shared data. + /// This limit can be different for different instances of Object column. For example, we can decrease it + /// in takeDynamicStructureFromSourceColumns before merge. + size_t max_dynamic_paths; + /// Global limit on number of dynamic paths for all column instances of this Object type. It's the limit specified + /// in the type definition (for example 'JSON(max_dynamic_paths=N)'). max_dynamic_paths is always not greater than this limit. + size_t global_max_dynamic_paths; + /// Maximum number of dynamic types for each dynamic path. Used while creating Dynamic columns for new dynamic paths. + size_t max_dynamic_types; + /// Statistics on the number of non-null values for each dynamic path and for some shared data paths in the MergeTree data part. + /// Calculated during serializing of data part in MergeTree. Used to determine the set of dynamic paths for the merged part. + StatisticsPtr statistics; }; + } diff --git a/src/Columns/ColumnObjectDeprecated.cpp b/src/Columns/ColumnObjectDeprecated.cpp new file mode 100644 index 00000000000..d03b1d0df82 --- /dev/null +++ b/src/Columns/ColumnObjectDeprecated.cpp @@ -0,0 +1,1111 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int DUPLICATE_COLUMN; + extern const int EXPERIMENTAL_FEATURE_ERROR; + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; + extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; +} + +namespace +{ + +/// Recreates column with default scalar values and keeps sizes of arrays. +ColumnPtr recreateColumnWithDefaultValues( + const ColumnPtr & column, const DataTypePtr & scalar_type, size_t num_dimensions) +{ + const auto * column_array = checkAndGetColumn(column.get()); + if (column_array && num_dimensions) + { + return ColumnArray::create( + recreateColumnWithDefaultValues( + column_array->getDataPtr(), scalar_type, num_dimensions - 1), + IColumn::mutate(column_array->getOffsetsPtr())); + } + + return createArrayOfType(scalar_type, num_dimensions)->createColumn()->cloneResized(column->size()); +} + +/// Replaces NULL fields to given field or empty array. +class FieldVisitorReplaceNull : public StaticVisitor +{ +public: + explicit FieldVisitorReplaceNull( + const Field & replacement_, size_t num_dimensions_) + : replacement(replacement_) + , num_dimensions(num_dimensions_) + { + } + + Field operator()(const Null &) const + { + return num_dimensions ? Array() : replacement; + } + + Field operator()(const Array & x) const + { + assert(num_dimensions > 0); + const size_t size = x.size(); + Array res(size); + for (size_t i = 0; i < size; ++i) + res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]); + return res; + } + + template + Field operator()(const T & x) const { return x; } + +private: + const Field & replacement; + size_t num_dimensions; +}; + +/// Visitor that allows to get type of scalar field +/// or least common type of scalars in array. +/// More optimized version of FieldToDataType. +class FieldVisitorToScalarType : public StaticVisitor<> +{ +public: + using FieldType = Field::Types::Which; + + void operator()(const Array & x) + { + size_t size = x.size(); + for (size_t i = 0; i < size; ++i) + applyVisitor(*this, x[i]); + } + + void operator()(const UInt64 & x) + { + field_types.insert(FieldType::UInt64); + if (x <= std::numeric_limits::max()) + type_indexes.insert(TypeIndex::UInt8); + else if (x <= std::numeric_limits::max()) + type_indexes.insert(TypeIndex::UInt16); + else if (x <= std::numeric_limits::max()) + type_indexes.insert(TypeIndex::UInt32); + else + type_indexes.insert(TypeIndex::UInt64); + } + + void operator()(const Int64 & x) + { + field_types.insert(FieldType::Int64); + if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) + type_indexes.insert(TypeIndex::Int8); + else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) + type_indexes.insert(TypeIndex::Int16); + else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) + type_indexes.insert(TypeIndex::Int32); + else + type_indexes.insert(TypeIndex::Int64); + } + + void operator()(const bool &) + { + field_types.insert(FieldType::UInt64); + type_indexes.insert(TypeIndex::UInt8); + } + + void operator()(const Null &) + { + have_nulls = true; + } + + template + void operator()(const T &) + { + field_types.insert(Field::TypeToEnum>::value); + type_indexes.insert(TypeToTypeIndex>); + } + + DataTypePtr getScalarType() const { return getLeastSupertypeOrString(type_indexes); } + bool haveNulls() const { return have_nulls; } + bool needConvertField() const { return field_types.size() > 1; } + +private: + TypeIndexSet type_indexes; + std::unordered_set field_types; + bool have_nulls = false; +}; + +} + +FieldInfo getFieldInfo(const Field & field) +{ + FieldVisitorToScalarType to_scalar_type_visitor; + applyVisitor(to_scalar_type_visitor, field); + FieldVisitorToNumberOfDimensions to_number_dimension_visitor; + + return + { + to_scalar_type_visitor.getScalarType(), + to_scalar_type_visitor.haveNulls(), + to_scalar_type_visitor.needConvertField(), + applyVisitor(to_number_dimension_visitor, field), + to_number_dimension_visitor.need_fold_dimension + }; +} + +ColumnObjectDeprecated::Subcolumn::Subcolumn(MutableColumnPtr && data_, bool is_nullable_) + : least_common_type(getDataTypeByColumn(*data_)) + , is_nullable(is_nullable_) + , num_rows(data_->size()) +{ + data.push_back(std::move(data_)); +} + +ColumnObjectDeprecated::Subcolumn::Subcolumn( + size_t size_, bool is_nullable_) + : least_common_type(std::make_shared()) + , is_nullable(is_nullable_) + , num_of_defaults_in_prefix(size_) + , num_rows(size_) +{ +} + +size_t ColumnObjectDeprecated::Subcolumn::size() const +{ + return num_rows; +} + +size_t ColumnObjectDeprecated::Subcolumn::byteSize() const +{ + size_t res = 0; + for (const auto & part : data) + res += part->byteSize(); + return res; +} + +size_t ColumnObjectDeprecated::Subcolumn::allocatedBytes() const +{ + size_t res = 0; + for (const auto & part : data) + res += part->allocatedBytes(); + return res; +} + +void ColumnObjectDeprecated::Subcolumn::get(size_t n, Field & res) const +{ + if (isFinalized()) + { + getFinalizedColumn().get(n, res); + return; + } + + size_t ind = n; + if (ind < num_of_defaults_in_prefix) + { + res = least_common_type.get()->getDefault(); + return; + } + + ind -= num_of_defaults_in_prefix; + for (const auto & part : data) + { + if (ind < part->size()) + { + part->get(ind, res); + res = convertFieldToTypeOrThrow(res, *least_common_type.get()); + return; + } + + ind -= part->size(); + } + + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); +} + +void ColumnObjectDeprecated::Subcolumn::checkTypes() const +{ + DataTypes prefix_types; + prefix_types.reserve(data.size()); + for (size_t i = 0; i < data.size(); ++i) + { + auto current_type = getDataTypeByColumn(*data[i]); + prefix_types.push_back(current_type); + auto prefix_common_type = getLeastSupertype(prefix_types); + if (!prefix_common_type->equals(*current_type)) + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Data type {} of column at position {} cannot represent all columns from i-th prefix", + current_type->getName(), i); + } +} + +void ColumnObjectDeprecated::Subcolumn::insert(Field field) +{ + auto info = DB::getFieldInfo(field); + insert(std::move(field), std::move(info)); +} + +void ColumnObjectDeprecated::Subcolumn::addNewColumnPart(DataTypePtr type) +{ + auto serialization = type->getSerialization(ISerialization::Kind::SPARSE); + data.push_back(type->createColumn(*serialization)); + least_common_type = LeastCommonType{std::move(type)}; +} + +static bool isConversionRequiredBetweenIntegers(const IDataType & lhs, const IDataType & rhs) +{ + /// If both of types are signed/unsigned integers and size of left field type + /// is less than right type, we don't need to convert field, + /// because all integer fields are stored in Int64/UInt64. + + WhichDataType which_lhs(lhs); + WhichDataType which_rhs(rhs); + + bool is_native_int = which_lhs.isNativeInt() && which_rhs.isNativeInt(); + bool is_native_uint = which_lhs.isNativeUInt() && which_rhs.isNativeUInt(); + + return (!is_native_int && !is_native_uint) + || lhs.getSizeOfValueInMemory() > rhs.getSizeOfValueInMemory(); +} + +void ColumnObjectDeprecated::Subcolumn::insert(Field field, FieldInfo info) +{ + auto base_type = std::move(info.scalar_type); + + if (isNothing(base_type) && info.num_dimensions == 0) + { + insertDefault(); + return; + } + + auto column_dim = least_common_type.getNumberOfDimensions(); + auto value_dim = info.num_dimensions; + + if (isNothing(least_common_type.get())) + column_dim = value_dim; + + if (isNothing(base_type)) + value_dim = column_dim; + + if (value_dim != column_dim) + throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, + "Dimension of types mismatched between inserted value and column. " + "Dimension of value: {}. Dimension of column: {}", + value_dim, column_dim); + + if (is_nullable) + base_type = makeNullable(base_type); + + if (!is_nullable && info.have_nulls) + field = applyVisitor(FieldVisitorReplaceNull(base_type->getDefault(), value_dim), std::move(field)); + + bool type_changed = false; + const auto & least_common_base_type = least_common_type.getBase(); + + if (data.empty()) + { + addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); + } + else if (!least_common_base_type->equals(*base_type) && !isNothing(base_type)) + { + if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type)) + { + base_type = getLeastSupertypeOrString(DataTypes{std::move(base_type), least_common_base_type}); + type_changed = true; + if (!least_common_base_type->equals(*base_type)) + addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); + } + } + + if (type_changed || info.need_convert) + field = convertFieldToTypeOrThrow(field, *least_common_type.get()); + + if (!data.back()->tryInsert(field)) + { + /** Normalization of the field above is pretty complicated (it uses several FieldVisitors), + * so in the case of a bug, we may get mismatched types. + * The `IColumn::insert` method does not check the type of the inserted field, and it can lead to a segmentation fault. + * Therefore, we use the safer `tryInsert` method to get an exception instead of a segmentation fault. + */ + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Cannot insert field {} to column {}", + field.dump(), data.back()->dumpStructure()); + } + + ++num_rows; +} + +void ColumnObjectDeprecated::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length) +{ + assert(start + length <= src.size()); + size_t end = start + length; + num_rows += length; + + if (data.empty()) + { + addNewColumnPart(src.getLeastCommonType()); + } + else if (!least_common_type.get()->equals(*src.getLeastCommonType())) + { + auto new_least_common_type = getLeastSupertypeOrString(DataTypes{least_common_type.get(), src.getLeastCommonType()}); + if (!new_least_common_type->equals(*least_common_type.get())) + addNewColumnPart(std::move(new_least_common_type)); + } + + if (end <= src.num_of_defaults_in_prefix) + { + data.back()->insertManyDefaults(length); + return; + } + + if (start < src.num_of_defaults_in_prefix) + data.back()->insertManyDefaults(src.num_of_defaults_in_prefix - start); + + auto insert_from_part = [&](const auto & column, size_t from, size_t n) + { + assert(from + n <= column->size()); + auto column_type = getDataTypeByColumn(*column); + + if (column_type->equals(*least_common_type.get())) + { + data.back()->insertRangeFrom(*column, from, n); + return; + } + + /// If we need to insert large range, there is no sense to cut part of column and cast it. + /// Casting of all column and inserting from it can be faster. + /// Threshold is just a guess. + + if (n * 3 >= column->size()) + { + auto casted_column = castColumn({column, column_type, ""}, least_common_type.get()); + data.back()->insertRangeFrom(*casted_column, from, n); + return; + } + + auto casted_column = column->cut(from, n); + casted_column = castColumn({casted_column, column_type, ""}, least_common_type.get()); + data.back()->insertRangeFrom(*casted_column, 0, n); + }; + + size_t pos = 0; + size_t processed_rows = src.num_of_defaults_in_prefix; + + /// Find the first part of the column that intersects the range. + while (pos < src.data.size() && processed_rows + src.data[pos]->size() < start) + { + processed_rows += src.data[pos]->size(); + ++pos; + } + + /// Insert from the first part of column. + if (pos < src.data.size() && processed_rows < start) + { + size_t part_start = start - processed_rows; + size_t part_length = std::min(src.data[pos]->size() - part_start, end - start); + insert_from_part(src.data[pos], part_start, part_length); + processed_rows += src.data[pos]->size(); + ++pos; + } + + /// Insert from the parts of column in the middle of range. + while (pos < src.data.size() && processed_rows + src.data[pos]->size() < end) + { + insert_from_part(src.data[pos], 0, src.data[pos]->size()); + processed_rows += src.data[pos]->size(); + ++pos; + } + + /// Insert from the last part of column if needed. + if (pos < src.data.size() && processed_rows < end) + { + size_t part_end = end - processed_rows; + insert_from_part(src.data[pos], 0, part_end); + } +} + +bool ColumnObjectDeprecated::Subcolumn::isFinalized() const +{ + return num_of_defaults_in_prefix == 0 && + (data.empty() || (data.size() == 1 && !data[0]->isSparse())); +} + +void ColumnObjectDeprecated::Subcolumn::finalize() +{ + if (isFinalized()) + return; + + if (data.size() == 1 && num_of_defaults_in_prefix == 0) + { + data[0] = data[0]->convertToFullColumnIfSparse(); + return; + } + + const auto & to_type = least_common_type.get(); + auto result_column = to_type->createColumn(); + + if (num_of_defaults_in_prefix) + result_column->insertManyDefaults(num_of_defaults_in_prefix); + + for (auto & part : data) + { + part = part->convertToFullColumnIfSparse(); + auto from_type = getDataTypeByColumn(*part); + size_t part_size = part->size(); + + if (!from_type->equals(*to_type)) + { + auto offsets = ColumnUInt64::create(); + auto & offsets_data = offsets->getData(); + + /// We need to convert only non-default values and then recreate column + /// with default value of new type, because default values (which represents misses in data) + /// may be inconsistent between types (e.g "0" in UInt64 and empty string in String). + + part->getIndicesOfNonDefaultRows(offsets_data, 0, part_size); + + if (offsets->size() == part_size) + { + part = castColumn({part, from_type, ""}, to_type); + } + else + { + auto values = part->index(*offsets, offsets->size()); + values = castColumn({values, from_type, ""}, to_type); + part = values->createWithOffsets(offsets_data, *createColumnConstWithDefaultValue(result_column->getPtr()), part_size, /*shift=*/ 0); + } + } + + result_column->insertRangeFrom(*part, 0, part_size); + } + + data = { std::move(result_column) }; + num_of_defaults_in_prefix = 0; +} + +void ColumnObjectDeprecated::Subcolumn::insertDefault() +{ + if (data.empty()) + ++num_of_defaults_in_prefix; + else + data.back()->insertDefault(); + + ++num_rows; +} + +void ColumnObjectDeprecated::Subcolumn::insertManyDefaults(size_t length) +{ + if (data.empty()) + num_of_defaults_in_prefix += length; + else + data.back()->insertManyDefaults(length); + + num_rows += length; +} + +void ColumnObjectDeprecated::Subcolumn::popBack(size_t n) +{ + assert(n <= size()); + + num_rows -= n; + size_t num_removed = 0; + for (auto it = data.rbegin(); it != data.rend(); ++it) + { + if (n == 0) + break; + + auto & column = *it; + if (n < column->size()) + { + column->popBack(n); + n = 0; + } + else + { + ++num_removed; + n -= column->size(); + } + } + + data.resize(data.size() - num_removed); + num_of_defaults_in_prefix -= n; +} + +ColumnObjectDeprecated::Subcolumn ColumnObjectDeprecated::Subcolumn::cut(size_t start, size_t length) const +{ + Subcolumn new_subcolumn(0, is_nullable); + new_subcolumn.insertRangeFrom(*this, start, length); + return new_subcolumn; +} + +Field ColumnObjectDeprecated::Subcolumn::getLastField() const +{ + if (data.empty()) + return Field(); + + const auto & last_part = data.back(); + assert(!last_part->empty()); + return (*last_part)[last_part->size() - 1]; +} + +FieldInfo ColumnObjectDeprecated::Subcolumn::getFieldInfo() const +{ + const auto & base_type = least_common_type.getBase(); + return FieldInfo + { + .scalar_type = base_type, + .have_nulls = base_type->isNullable(), + .need_convert = false, + .num_dimensions = least_common_type.getNumberOfDimensions(), + .need_fold_dimension = false, + }; +} + +ColumnObjectDeprecated::Subcolumn ColumnObjectDeprecated::Subcolumn::recreateWithDefaultValues(const FieldInfo & field_info) const +{ + auto scalar_type = field_info.scalar_type; + if (is_nullable) + scalar_type = makeNullable(scalar_type); + + Subcolumn new_subcolumn(*this); + new_subcolumn.least_common_type = LeastCommonType{createArrayOfType(scalar_type, field_info.num_dimensions)}; + + for (auto & part : new_subcolumn.data) + part = recreateColumnWithDefaultValues(part, scalar_type, field_info.num_dimensions); + + return new_subcolumn; +} + +IColumn & ColumnObjectDeprecated::Subcolumn::getFinalizedColumn() +{ + assert(isFinalized()); + return *data[0]; +} + +const IColumn & ColumnObjectDeprecated::Subcolumn::getFinalizedColumn() const +{ + assert(isFinalized()); + return *data[0]; +} + +const ColumnPtr & ColumnObjectDeprecated::Subcolumn::getFinalizedColumnPtr() const +{ + assert(isFinalized()); + return data[0]; +} + +ColumnObjectDeprecated::Subcolumn::LeastCommonType::LeastCommonType() + : type(std::make_shared()) + , base_type(type) + , num_dimensions(0) +{ +} + +ColumnObjectDeprecated::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_) + : type(std::move(type_)) + , base_type(getBaseTypeOfArray(type)) + , num_dimensions(DB::getNumberOfDimensions(*type)) +{ +} + +ColumnObjectDeprecated::ColumnObjectDeprecated(bool is_nullable_) + : is_nullable(is_nullable_) + , num_rows(0) +{ +} + +ColumnObjectDeprecated::ColumnObjectDeprecated(Subcolumns && subcolumns_, bool is_nullable_) + : is_nullable(is_nullable_) + , subcolumns(std::move(subcolumns_)) + , num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size()) + +{ + checkConsistency(); +} + +void ColumnObjectDeprecated::checkConsistency() const +{ + if (subcolumns.empty()) + return; + + for (const auto & leaf : subcolumns) + { + if (num_rows != leaf->data.size()) + { + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Sizes of subcolumns are inconsistent in ColumnObjectDeprecated." + " Subcolumn '{}' has {} rows, but expected size is {}", + leaf->path.getPath(), leaf->data.size(), num_rows); + } + } +} + +size_t ColumnObjectDeprecated::size() const +{ +#ifndef NDEBUG + checkConsistency(); +#endif + return num_rows; +} + +size_t ColumnObjectDeprecated::byteSize() const +{ + size_t res = 0; + for (const auto & entry : subcolumns) + res += entry->data.byteSize(); + return res; +} + +size_t ColumnObjectDeprecated::allocatedBytes() const +{ + size_t res = 0; + for (const auto & entry : subcolumns) + res += entry->data.allocatedBytes(); + return res; +} + +void ColumnObjectDeprecated::forEachSubcolumn(MutableColumnCallback callback) +{ + for (auto & entry : subcolumns) + for (auto & part : entry->data.data) + callback(part); +} + +void ColumnObjectDeprecated::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) +{ + for (auto & entry : subcolumns) + { + for (auto & part : entry->data.data) + { + callback(*part); + part->forEachSubcolumnRecursively(callback); + } + } +} + +void ColumnObjectDeprecated::insert(const Field & field) +{ + const auto & object = field.safeGet(); + + HashSet inserted_paths; + size_t old_size = size(); + for (const auto & [key_str, value] : object) + { + PathInData key(key_str); + inserted_paths.insert(key_str); + if (!hasSubcolumn(key)) + addSubcolumn(key, old_size); + + auto & subcolumn = getSubcolumn(key); + subcolumn.insert(value); + } + + for (auto & entry : subcolumns) + { + if (!inserted_paths.has(entry->path.getPath())) + { + bool inserted = tryInsertDefaultFromNested(entry); + if (!inserted) + entry->data.insertDefault(); + } + } + + ++num_rows; +} + +bool ColumnObjectDeprecated::tryInsert(const Field & field) +{ + if (field.getType() != Field::Types::Which::Object) + return false; + + insert(field); + return true; +} + +void ColumnObjectDeprecated::insertDefault() +{ + for (auto & entry : subcolumns) + entry->data.insertDefault(); + + ++num_rows; +} + +Field ColumnObjectDeprecated::operator[](size_t n) const +{ + Field object; + get(n, object); + return object; +} + +void ColumnObjectDeprecated::get(size_t n, Field & res) const +{ + assert(n < size()); + res = Object(); + auto & object = res.safeGet(); + + for (const auto & entry : subcolumns) + { + auto it = object.try_emplace(entry->path.getPath()).first; + entry->data.get(n, it->second); + } +} + +#if !defined(DEBUG_OR_SANITIZER_BUILD) +void ColumnObjectDeprecated::insertFrom(const IColumn & src, size_t n) +#else +void ColumnObjectDeprecated::doInsertFrom(const IColumn & src, size_t n) +#endif +{ + insert(src[n]); +} + +#if !defined(DEBUG_OR_SANITIZER_BUILD) +void ColumnObjectDeprecated::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnObjectDeprecated::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif +{ + const auto & src_object = assert_cast(src); + + for (const auto & entry : src_object.subcolumns) + { + if (!hasSubcolumn(entry->path)) + { + if (entry->path.hasNested()) + addNestedSubcolumn(entry->path, entry->data.getFieldInfo(), num_rows); + else + addSubcolumn(entry->path, num_rows); + } + + auto & subcolumn = getSubcolumn(entry->path); + subcolumn.insertRangeFrom(entry->data, start, length); + } + + for (auto & entry : subcolumns) + { + if (!src_object.hasSubcolumn(entry->path)) + { + bool inserted = tryInsertManyDefaultsFromNested(entry); + if (!inserted) + entry->data.insertManyDefaults(length); + } + } + + num_rows += length; + finalize(); +} + +void ColumnObjectDeprecated::popBack(size_t length) +{ + for (auto & entry : subcolumns) + entry->data.popBack(length); + + num_rows -= length; +} + +template +MutableColumnPtr ColumnObjectDeprecated::applyForSubcolumns(Func && func) const +{ + if (!isFinalized()) + { + auto finalized = cloneFinalized(); + auto & finalized_object = assert_cast(*finalized); + return finalized_object.applyForSubcolumns(std::forward(func)); + } + + auto res = ColumnObjectDeprecated::create(is_nullable); + for (const auto & subcolumn : subcolumns) + { + auto new_subcolumn = func(subcolumn->data.getFinalizedColumn()); + res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable()); + } + + return res; +} + +ColumnPtr ColumnObjectDeprecated::permute(const Permutation & perm, size_t limit) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }); +} + +ColumnPtr ColumnObjectDeprecated::filter(const Filter & filter, ssize_t result_size_hint) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }); +} + +ColumnPtr ColumnObjectDeprecated::index(const IColumn & indexes, size_t limit) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }); +} + +ColumnPtr ColumnObjectDeprecated::replicate(const Offsets & offsets) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.replicate(offsets); }); +} + +MutableColumnPtr ColumnObjectDeprecated::cloneResized(size_t new_size) const +{ + if (new_size == 0) + return ColumnObjectDeprecated::create(is_nullable); + + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.cloneResized(new_size); }); +} + +void ColumnObjectDeprecated::getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const +{ + res.resize(num_rows); + iota(res.data(), res.size(), size_t(0)); +} + +void ColumnObjectDeprecated::getExtremes(Field & min, Field & max) const +{ + if (num_rows == 0) + { + min = Object(); + max = Object(); + } + else + { + get(0, min); + get(0, max); + } +} + +const ColumnObjectDeprecated::Subcolumn & ColumnObjectDeprecated::getSubcolumn(const PathInData & key) const +{ + if (const auto * node = subcolumns.findLeaf(key)) + return node->data; + + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObjectDeprecated", key.getPath()); +} + +ColumnObjectDeprecated::Subcolumn & ColumnObjectDeprecated::getSubcolumn(const PathInData & key) +{ + if (const auto * node = subcolumns.findLeaf(key)) + return const_cast(node)->data; + + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObjectDeprecated", key.getPath()); +} + +bool ColumnObjectDeprecated::hasSubcolumn(const PathInData & key) const +{ + return subcolumns.findLeaf(key) != nullptr; +} + +void ColumnObjectDeprecated::addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn) +{ + size_t new_size = subcolumn->size(); + bool inserted = subcolumns.add(key, Subcolumn(std::move(subcolumn), is_nullable)); + + if (!inserted) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); + + if (num_rows == 0) + num_rows = new_size; + else if (new_size != num_rows) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, + "Size of subcolumn {} ({}) is inconsistent with column size ({})", + key.getPath(), new_size, num_rows); +} + +void ColumnObjectDeprecated::addSubcolumn(const PathInData & key, size_t new_size) +{ + bool inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); + if (!inserted) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); + + if (num_rows == 0) + num_rows = new_size; + else if (new_size != num_rows) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, + "Required size of subcolumn {} ({}) is inconsistent with column size ({})", + key.getPath(), new_size, num_rows); +} + +void ColumnObjectDeprecated::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size) +{ + if (!key.hasNested()) + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Cannot add Nested subcolumn, because path doesn't contain Nested"); + + bool inserted = false; + /// We find node that represents the same Nested type as @key. + const auto * nested_node = subcolumns.findBestMatch(key); + + if (nested_node) + { + /// Find any leaf of Nested subcolumn. + const auto * leaf = Subcolumns::findLeaf(nested_node, [&](const auto &) { return true; }); + assert(leaf); + + /// Recreate subcolumn with default values and the same sizes of arrays. + auto new_subcolumn = leaf->data.recreateWithDefaultValues(field_info); + + /// It's possible that we have already inserted value from current row + /// to this subcolumn. So, adjust size to expected. + if (new_subcolumn.size() > new_size) + new_subcolumn.popBack(new_subcolumn.size() - new_size); + + assert(new_subcolumn.size() == new_size); + inserted = subcolumns.add(key, new_subcolumn); + } + else + { + /// If node was not found just add subcolumn with empty arrays. + inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); + } + + if (!inserted) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); + + if (num_rows == 0) + num_rows = new_size; + else if (new_size != num_rows) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, + "Required size of subcolumn {} ({}) is inconsistent with column size ({})", + key.getPath(), new_size, num_rows); +} + +const ColumnObjectDeprecated::Subcolumns::Node * ColumnObjectDeprecated::getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const +{ + if (!entry->path.hasNested()) + return nullptr; + + size_t old_size = entry->data.size(); + const auto * current_node = subcolumns.findLeaf(entry->path); + const Subcolumns::Node * leaf = nullptr; + + while (current_node) + { + /// Try to find the first Nested up to the current node. + const auto * node_nested = Subcolumns::findParent(current_node, + [](const auto & candidate) { return candidate.isNested(); }); + + if (!node_nested) + break; + + /// Find the leaf with subcolumn that contains values + /// for the last rows. + /// If there are no leaves, skip current node and find + /// the next node up to the current. + leaf = Subcolumns::findLeaf(node_nested, + [&](const auto & candidate) + { + return candidate.data.size() > old_size; + }); + + if (leaf) + break; + + current_node = node_nested->parent; + } + + if (leaf && isNothing(leaf->data.getLeastCommonTypeBase())) + return nullptr; + + return leaf; +} + +bool ColumnObjectDeprecated::tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const +{ + const auto * leaf = getLeafOfTheSameNested(entry); + if (!leaf) + return false; + + size_t old_size = entry->data.size(); + auto field_info = entry->data.getFieldInfo(); + + /// Cut the needed range from the found leaf + /// and replace scalar values to the correct + /// default values for given entry. + auto new_subcolumn = leaf->data + .cut(old_size, leaf->data.size() - old_size) + .recreateWithDefaultValues(field_info); + + entry->data.insertRangeFrom(new_subcolumn, 0, new_subcolumn.size()); + return true; +} + +bool ColumnObjectDeprecated::tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const +{ + const auto * leaf = getLeafOfTheSameNested(entry); + if (!leaf) + return false; + + auto last_field = leaf->data.getLastField(); + if (last_field.isNull()) + return false; + + size_t leaf_num_dimensions = leaf->data.getNumberOfDimensions(); + size_t entry_num_dimensions = entry->data.getNumberOfDimensions(); + + auto default_scalar = entry_num_dimensions > leaf_num_dimensions + ? createEmptyArrayField(entry_num_dimensions - leaf_num_dimensions) + : entry->data.getLeastCommonTypeBase()->getDefault(); + + auto default_field = applyVisitor(FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field); + entry->data.insert(std::move(default_field)); + return true; +} + +PathsInData ColumnObjectDeprecated::getKeys() const +{ + PathsInData keys; + keys.reserve(subcolumns.size()); + for (const auto & entry : subcolumns) + keys.emplace_back(entry->path); + return keys; +} + +bool ColumnObjectDeprecated::isFinalized() const +{ + return std::all_of(subcolumns.begin(), subcolumns.end(), + [](const auto & entry) { return entry->data.isFinalized(); }); +} + +void ColumnObjectDeprecated::finalize() +{ + size_t old_size = size(); + Subcolumns new_subcolumns; + for (auto && entry : subcolumns) + { + const auto & least_common_type = entry->data.getLeastCommonType(); + + /// Do not add subcolumns, which consist only from NULLs. + if (isNothing(getBaseTypeOfArray(least_common_type))) + continue; + + entry->data.finalize(); + new_subcolumns.add(entry->path, entry->data); + } + + /// If all subcolumns were skipped add a dummy subcolumn, + /// because Tuple type must have at least one element. + if (new_subcolumns.empty()) + new_subcolumns.add(PathInData{COLUMN_NAME_DUMMY}, Subcolumn{ColumnUInt8::create(old_size, 0), is_nullable}); + + std::swap(subcolumns, new_subcolumns); + checkObjectHasNoAmbiguosPaths(getKeys()); +} + +void ColumnObjectDeprecated::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} + +} diff --git a/src/Columns/ColumnObjectDeprecated.h b/src/Columns/ColumnObjectDeprecated.h new file mode 100644 index 00000000000..29e2d8f0709 --- /dev/null +++ b/src/Columns/ColumnObjectDeprecated.h @@ -0,0 +1,275 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/// Info that represents a scalar or array field in a decomposed view. +/// It allows to recreate field with different number +/// of dimensions or nullability. +struct FieldInfo +{ + /// The common type of of all scalars in field. + DataTypePtr scalar_type; + + /// Do we have NULL scalar in field. + bool have_nulls; + + /// If true then we have scalars with different types in array and + /// we need to convert scalars to the common type. + bool need_convert; + + /// Number of dimension in array. 0 if field is scalar. + size_t num_dimensions; + + /// If true then this field is an array of variadic dimension field + /// and we need to normalize the dimension + bool need_fold_dimension; +}; + +FieldInfo getFieldInfo(const Field & field); + +/** A column that represents object with dynamic set of subcolumns. + * Subcolumns are identified by paths in document and are stored in + * a trie-like structure. ColumnObjectDeprecated is not suitable for writing into tables + * and it should be converted to Tuple with fixed set of subcolumns before that. + */ +class ColumnObjectDeprecated final : public COWHelper, ColumnObjectDeprecated> +{ +public: + /** Class that represents one subcolumn. + * It stores values in several parts of column + * and keeps current common type of all parts. + * We add a new column part with a new type, when we insert a field, + * which can't be converted to the current common type. + * After insertion of all values subcolumn should be finalized + * for writing and other operations. + */ + class Subcolumn + { + public: + Subcolumn() = default; + Subcolumn(size_t size_, bool is_nullable_); + Subcolumn(MutableColumnPtr && data_, bool is_nullable_); + + size_t size() const; + size_t byteSize() const; + size_t allocatedBytes() const; + void get(size_t n, Field & res) const; + + bool isFinalized() const; + const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); } + const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); } + size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); } + + /// Checks the consistency of column's parts stored in @data. + void checkTypes() const; + + /// Inserts a field, which scalars can be arbitrary, but number of + /// dimensions should be consistent with current common type. + void insert(Field field); + void insert(Field field, FieldInfo info); + + void insertDefault(); + void insertManyDefaults(size_t length); + void insertRangeFrom(const Subcolumn & src, size_t start, size_t length); + void popBack(size_t n); + + Subcolumn cut(size_t start, size_t length) const; + + /// Converts all column's parts to the common type and + /// creates a single column that stores all values. + void finalize(); + + /// Returns last inserted field. + Field getLastField() const; + + FieldInfo getFieldInfo() const; + + /// Recreates subcolumn with default scalar values and keeps sizes of arrays. + /// Used to create columns of type Nested with consistent array sizes. + Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const; + + /// Returns single column if subcolumn in finalizes. + /// Otherwise -- undefined behaviour. + IColumn & getFinalizedColumn(); + const IColumn & getFinalizedColumn() const; + const ColumnPtr & getFinalizedColumnPtr() const; + + const std::vector & getData() const { return data; } + size_t getNumberOfDefaultsInPrefix() const { return num_of_defaults_in_prefix; } + + friend class ColumnObjectDeprecated; + + private: + class LeastCommonType + { + public: + LeastCommonType(); + explicit LeastCommonType(DataTypePtr type_); + + const DataTypePtr & get() const { return type; } + const DataTypePtr & getBase() const { return base_type; } + size_t getNumberOfDimensions() const { return num_dimensions; } + + private: + DataTypePtr type; + DataTypePtr base_type; + size_t num_dimensions = 0; + }; + + void addNewColumnPart(DataTypePtr type); + + /// Current least common type of all values inserted to this subcolumn. + LeastCommonType least_common_type; + + /// If true then common type type of subcolumn is Nullable + /// and default values are NULLs. + bool is_nullable = false; + + /// Parts of column. Parts should be in increasing order in terms of subtypes/supertypes. + /// That means that the least common type for i-th prefix is the type of i-th part + /// and it's the supertype for all type of column from 0 to i-1. + std::vector data; + + /// Until we insert any non-default field we don't know further + /// least common type and we count number of defaults in prefix, + /// which will be converted to the default type of final common type. + size_t num_of_defaults_in_prefix = 0; + + size_t num_rows = 0; + }; + + using Subcolumns = SubcolumnsTree; + +private: + /// If true then all subcolumns are nullable. + const bool is_nullable; + + Subcolumns subcolumns; + size_t num_rows; + +public: + static constexpr auto COLUMN_NAME_DUMMY = "_dummy"; + + explicit ColumnObjectDeprecated(bool is_nullable_); + ColumnObjectDeprecated(Subcolumns && subcolumns_, bool is_nullable_); + + /// Checks that all subcolumns have consistent sizes. + void checkConsistency() const; + + bool hasSubcolumn(const PathInData & key) const; + + const Subcolumn & getSubcolumn(const PathInData & key) const; + Subcolumn & getSubcolumn(const PathInData & key); + + void incrementNumRows() { ++num_rows; } + + /// Adds a subcolumn from existing IColumn. + void addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn); + + /// Adds a subcolumn of specific size with default values. + void addSubcolumn(const PathInData & key, size_t new_size); + + /// Adds a subcolumn of type Nested of specific size with default values. + /// It cares about consistency of sizes of Nested arrays. + void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size); + + /// Finds a subcolumn from the same Nested type as @entry and inserts + /// an array with default values with consistent sizes as in Nested type. + bool tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const; + bool tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const; + + const Subcolumns & getSubcolumns() const { return subcolumns; } + Subcolumns & getSubcolumns() { return subcolumns; } + PathsInData getKeys() const; + + /// Part of interface + + const char * getFamilyName() const override { return "Object"; } + TypeIndex getDataType() const override { return TypeIndex::ObjectDeprecated; } + + size_t size() const override; + size_t byteSize() const override; + size_t allocatedBytes() const override; + void forEachSubcolumn(MutableColumnCallback callback) override; + void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override; + void insert(const Field & field) override; + bool tryInsert(const Field & field) override; + void insertDefault() override; +#if !defined(DEBUG_OR_SANITIZER_BUILD) + void insertFrom(const IColumn & src, size_t n) override; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + void popBack(size_t length) override; + Field operator[](size_t n) const override; + void get(size_t n, Field & res) const override; + + ColumnPtr permute(const Permutation & perm, size_t limit) const override; + ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override; + ColumnPtr index(const IColumn & indexes, size_t limit) const override; + ColumnPtr replicate(const Offsets & offsets) const override; + MutableColumnPtr cloneResized(size_t new_size) const override; + + /// Finalizes all subcolumns. + void finalize() override; + bool isFinalized() const override; + + /// Order of rows in ColumnObjectDeprecated is undefined. + void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; + void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} +#if !defined(DEBUG_OR_SANITIZER_BUILD) + int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif + void getExtremes(Field & min, Field & max) const override; + + /// All other methods throw exception. + + StringRef getDataAt(size_t) const override { throwMustBeConcrete(); } + bool isDefaultAt(size_t) const override { throwMustBeConcrete(); } + void insertData(const char *, size_t) override { throwMustBeConcrete(); } + StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); } + char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeConcrete(); } + const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } + const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } + void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } + WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); } + void updateHashFast(SipHash &) const override; + void expand(const Filter &, bool) override { throwMustBeConcrete(); } + bool hasEqualValues() const override { throwMustBeConcrete(); } + size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } + double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); } + UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); } + void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); } + +private: + [[noreturn]] static void throwMustBeConcrete() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ColumnObjectDeprecated must be converted to ColumnTuple before use"); + } + + template + MutableColumnPtr applyForSubcolumns(Func && func) const; + + /// It's used to get shared sized of Nested to insert correct default values. + const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const; +}; +} diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 7cfa2571f5a..00cf3bd9c30 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -557,6 +557,26 @@ void ColumnString::reserve(size_t n) offsets.reserve_exact(n); } +size_t ColumnString::capacity() const +{ + return offsets.capacity(); +} + +void ColumnString::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + size_t new_chars_size = chars.size(); + for (const auto & source_column : source_columns) + { + const auto & source_string_column = assert_cast(*source_column); + new_size += source_string_column.size(); + new_chars_size += source_string_column.chars.size(); + } + + offsets.reserve_exact(new_size); + chars.reserve_exact(new_chars_size); +} + void ColumnString::shrinkToFit() { chars.shrink_to_fit(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c1012e1e55e..ec0563b3f00 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -123,7 +123,7 @@ public: void insert(const Field & x) override { - const String & s = x.get(); + const String & s = x.safeGet(); const size_t old_size = chars.size(); const size_t size_to_append = s.size() + 1; const size_t new_size = old_size + size_to_append; @@ -283,6 +283,8 @@ public: ColumnPtr compress() const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void getExtremes(Field & min, Field & max) const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 4fc3f88a87c..e741eb51c68 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -141,7 +141,7 @@ void ColumnTuple::get(size_t n, Field & res) const const size_t tuple_size = columns.size(); res = Tuple(); - Tuple & res_tuple = res.get(); + Tuple & res_tuple = res.safeGet(); res_tuple.reserve(tuple_size); for (size_t i = 0; i < tuple_size; ++i) @@ -169,7 +169,7 @@ void ColumnTuple::insertData(const char *, size_t) void ColumnTuple::insert(const Field & x) { - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -185,7 +185,7 @@ bool ColumnTuple::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Tuple) return false; - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -595,6 +595,27 @@ void ColumnTuple::reserve(size_t n) getColumn(i).reserve(n); } +size_t ColumnTuple::capacity() const +{ + if (columns.empty()) + return size(); + + return getColumn(0).capacity(); +} + +void ColumnTuple::prepareForSquashing(const Columns & source_columns) +{ + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) + { + Columns nested_columns; + nested_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_columns.push_back(assert_cast(*source_column).getColumnPtr(i)); + getColumn(i).prepareForSquashing(nested_columns); + } +} + void ColumnTuple::shrinkToFit() { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 16b47a993f6..6968294aef9 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -110,6 +110,8 @@ public: void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index d6cb75679be..8a66f4e02ed 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -48,6 +48,8 @@ private: ColumnUnique(const ColumnUnique & other); public: + std::string getName() const override { return "Unique(" + getNestedColumn()->getName() + ")"; } + MutableColumnPtr cloneEmpty() const override; const ColumnPtr & getNestedColumn() const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index de7efb41d19..c6511695f5c 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -476,7 +476,7 @@ void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std } } -void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping) +void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping, const Discriminator * skip_discriminator) { const size_t num_variants = variants.size(); const auto & src = assert_cast(src_); @@ -557,9 +557,12 @@ void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, Discriminator global_discr = src_global_discr; if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) global_discr = (*global_discriminators_mapping)[src_global_discr]; - Discriminator local_discr = localDiscriminatorByGlobal(global_discr); - if (nested_length) - variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length); + if (!skip_discriminator || global_discr != *skip_discriminator) + { + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + if (nested_length) + variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length); + } } } @@ -610,7 +613,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) #endif { - insertRangeFromImpl(src_, start, length, nullptr); + insertRangeFromImpl(src_, start, length, nullptr, nullptr); } #if !defined(DEBUG_OR_SANITIZER_BUILD) @@ -627,9 +630,9 @@ void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::ve insertFromImpl(src_, n, &global_discriminators_mapping); } -void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping) +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping, Discriminator skip_discriminator) { - insertRangeFromImpl(src_, start, length, &global_discriminators_mapping); + insertRangeFromImpl(src_, start, length, &global_discriminators_mapping, &skip_discriminator); } void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping) @@ -673,6 +676,14 @@ void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator g variants[local_discr]->insertManyFrom(src_, position, length); } +void ColumnVariant::deserializeBinaryIntoVariant(ColumnVariant::Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings) +{ + auto local_discr = localDiscriminatorByGlobal(global_discr); + serialization->deserializeBinary(*variants[local_discr], buf, format_settings); + getLocalDiscriminators().push_back(local_discr); + getOffsets().push_back(variants[local_discr]->size() - 1); +} + void ColumnVariant::insertDefault() { getLocalDiscriminators().push_back(NULL_DISCRIMINATOR); @@ -942,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const { /// If we have only NULLs, index will take no effect, just return resized column. if (hasOnlyNulls()) - return cloneResized(limit); + return cloneResized(limit == 0 ? indexes.size(): limit); /// Optimization when we have only one non empty variant and no NULLs. /// In this case local_discriminators column is filled with identical values and offsets column @@ -998,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray & indexes, size_t new_variants.reserve(num_variants); for (size_t i = 0; i != num_variants; ++i) { - size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); - new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + /// Check if no values from this variant were selected. + if (nested_perms[i].empty()) + { + new_variants.emplace_back(variants[i]->cloneEmpty()); + } + else + { + size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); + new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + } } /// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation. @@ -1213,9 +1232,7 @@ struct ColumnVariant::ComparatorBase ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const { - int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); - - return res; + return parent.compareAt(lhs, rhs, parent, nan_direction_hint); } }; @@ -1247,8 +1264,30 @@ void ColumnVariant::updatePermutation(IColumn::PermutationSortDirection directio void ColumnVariant::reserve(size_t n) { - local_discriminators->reserve(n); - offsets->reserve(n); + getLocalDiscriminators().reserve_exact(n); + getOffsets().reserve_exact(n); +} + +void ColumnVariant::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + + for (size_t i = 0; i != variants.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + source_variant_columns.push_back(assert_cast(*source_column).getVariantPtrByGlobalDiscriminator(i)); + getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } +} + +size_t ColumnVariant::capacity() const +{ + return local_discriminators->capacity(); } void ColumnVariant::ensureOwnership() diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 34c24b5428d..925eab74af8 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB @@ -196,13 +198,15 @@ public: /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); - void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping); + /// Don't insert data into variant with skip_discriminator global discriminator, it will be processed separately. + void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping, Discriminator skip_discriminator); void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping); /// Methods for insertion into a specific variant. void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n); void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length); void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length); + void deserializeBinaryIntoVariant(Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings); void insertDefault() override; void insertManyDefaults(size_t length) override; @@ -237,6 +241,8 @@ public: size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + size_t capacity() const override; + void prepareForSquashing(const Columns & source_columns) override; void ensureOwnership() override; size_t byteSize() const override; size_t byteSizeAt(size_t n) const override; @@ -263,6 +269,7 @@ public: ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; } const NestedColumns & getVariants() const { return variants; } + NestedColumns & getVariants() { return variants; } const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; } IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; } @@ -302,6 +309,8 @@ public: return true; } + std::vector getLocalToGlobalDiscriminatorsMapping() const { return local_to_global_discriminators; } + /// Check if we have only 1 non-empty variant and no NULL values, /// and if so, return the discriminator of this non-empty column. std::optional getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const; @@ -322,7 +331,7 @@ public: private: void insertFromImpl(const IColumn & src_, size_t n, const std::vector * global_discriminators_mapping); - void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping); + void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping, const Discriminator * skip_discriminator); void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector * global_discriminators_mapping); void initIdentityGlobalToLocalDiscriminatorsMapping(); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 2fe5b635bd2..8f81da86375 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -85,7 +85,7 @@ public: void insertMany(const Field & field, size_t length) override { - data.resize_fill(data.size() + length, static_cast(field.get())); + data.resize_fill(data.size() + length, static_cast(field.safeGet())); } void insertData(const char * pos, size_t) override @@ -180,6 +180,11 @@ public: data.reserve_exact(n); } + size_t capacity() const override + { + return data.capacity(); + } + void shrinkToFit() override { data.shrink_to_fit(); @@ -235,7 +240,7 @@ public: void insert(const Field & x) override { - data.push_back(static_cast(x.get())); + data.push_back(static_cast(x.safeGet())); } bool tryInsert(const DB::Field & x) override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index a189903b617..15e29d1422a 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -11,12 +11,13 @@ #include #include #include -#include +#include #include #include #include #include #include +#include #include #include #include @@ -466,12 +467,13 @@ template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; -template class IColumnHelper; +template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; +template class IColumnHelper; template class IColumnHelper; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index f9c1a3e7034..e4fe233ffdf 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -475,6 +475,18 @@ public: /// It affects performance only (not correctness). virtual void reserve(size_t /*n*/) {} + /// Returns the number of elements allocated in reserve. + virtual size_t capacity() const { return size(); } + + /// Reserve memory before squashing all specified source columns into this column. + virtual void prepareForSquashing(const std::vector & source_columns) + { + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + } + /// Requests the removal of unused capacity. /// It is a non-binding request to reduce the capacity of the underlying container to its size. virtual void shrinkToFit() {} diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index a8e10e5e2b2..52b1bef3009 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -73,7 +73,7 @@ public: /// Returns dictionary hash which is SipHash is applied to each row of nested column. virtual UInt128 getHash() const = 0; - const char * getFamilyName() const override { return "ColumnUnique"; } + const char * getFamilyName() const override { return "Unique"; } TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); } void insert(const Field &) override diff --git a/src/Columns/tests/gtest_column_dump_structure.cpp b/src/Columns/tests/gtest_column_dump_structure.cpp index e00c77798c8..d9647147157 100644 --- a/src/Columns/tests/gtest_column_dump_structure.cpp +++ b/src/Columns/tests/gtest_column_dump_structure.cpp @@ -10,7 +10,7 @@ TEST(IColumn, dumpStructure) { auto type_lc = std::make_shared(std::make_shared()); ColumnPtr column_lc = type_lc->createColumn(); - String expected_structure = "ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1)))"; + String expected_structure = "LowCardinality(size = 0, UInt8(size = 0), Unique(size = 1, String(size = 1)))"; std::vector threads; for (size_t i = 0; i < 6; ++i) diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp index a2862b09de1..de76261229d 100644 --- a/src/Columns/tests/gtest_column_dynamic.cpp +++ b/src/Columns/tests/gtest_column_dynamic.cpp @@ -7,28 +7,34 @@ using namespace DB; TEST(ColumnDynamic, CreateEmpty) { - auto column = ColumnDynamic::create(255); + auto column = ColumnDynamic::create(254); ASSERT_TRUE(column->empty()); - ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); - ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)"); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1); + ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant"); + ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1); + ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0); + ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty()); } TEST(ColumnDynamic, InsertDefault) { - auto column = ColumnDynamic::create(255); + auto column = ColumnDynamic::create(254); column->insertDefault(); ASSERT_TRUE(column->size() == 1); - ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); - ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)"); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1); + ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant"); + ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1); + ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0); + ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty()); ASSERT_TRUE(column->isNullAt(0)); ASSERT_EQ((*column)[0], Field(Null())); } TEST(ColumnDynamic, InsertFields) { - auto column = ColumnDynamic::create(255); + auto column = ColumnDynamic::create(254); column->insert(Field(42)); column->insert(Field(-42)); column->insert(Field("str1")); @@ -41,16 +47,16 @@ TEST(ColumnDynamic, InsertFields) column->insert(Field(43.43)); ASSERT_TRUE(column->size() == 10); - ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); - std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)"); + std::vector expected_names = {"Float64", "Int8", "SharedVariant", "String"}; ASSERT_EQ(column->getVariantInfo().variant_names, expected_names); - std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}; ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); } ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42)) { - auto column = ColumnDynamic::create(255); + auto column = ColumnDynamic::create(254); for (size_t i = 0; i != num_variants; ++i) { Tuple tuple; @@ -66,61 +72,71 @@ TEST(ColumnDynamic, InsertFieldsOverflow1) { auto column = getDynamicWithManyVariants(253); - ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); column->insert(Field(42.42)); - ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + ASSERT_EQ(column->size(), 254); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64")); column->insert(Field(42)); + ASSERT_EQ(column->size(), 255); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 1); Field field = (*column)[column->size() - 1]; - ASSERT_EQ(field, "42"); + ASSERT_EQ(field, 42); column->insert(Field(43)); + ASSERT_EQ(column->size(), 256); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 2); field = (*column)[column->size() - 1]; - ASSERT_EQ(field, "43"); + ASSERT_EQ(field, 43); column->insert(Field("str1")); + ASSERT_EQ(column->size(), 257); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 3); field = (*column)[column->size() - 1]; ASSERT_EQ(field, "str1"); column->insert(Field(Array({Field(42), Field(43)}))); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 4); field = (*column)[column->size() - 1]; - ASSERT_EQ(field, "[42, 43]"); + ASSERT_EQ(field, Field(Array({Field(42), Field(43)}))); } TEST(ColumnDynamic, InsertFieldsOverflow2) { auto column = getDynamicWithManyVariants(254); - ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); column->insert(Field("str1")); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 1); + Field field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "str1"); column->insert(Field(42)); ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); - Field field = (*column)[column->size() - 1]; - ASSERT_EQ(field, "42"); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column->getSharedVariant().size(), 2); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, 42); } ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); for (size_t i = 0; i != num; ++i) { column_from->insert(Field(42)); @@ -154,41 +170,41 @@ void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynami TEST(ColumnDynamic, InsertFrom1) { - auto column_to = ColumnDynamic::create(255); - checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + auto column_to = ColumnDynamic::create(254); + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertFrom2) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str")); - checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertFrom3) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str")); column_to->insert(Array({Field(42)})); - checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}}); } TEST(ColumnDynamic, InsertFromOverflow1) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); auto column_to = getDynamicWithManyVariants(253); column_to->insertFrom(*column_from, 0); - ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); auto field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, 42); @@ -196,20 +212,22 @@ TEST(ColumnDynamic, InsertFromOverflow1) column_to->insertFrom(*column_from, 1); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 1); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); column_to->insertFrom(*column_from, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, "str"); } TEST(ColumnDynamic, InsertFromOverflow2) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); @@ -221,9 +239,32 @@ TEST(ColumnDynamic, InsertFromOverflow2) column_to->insertFrom(*column_from, 1); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 1); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); +} + +TEST(ColumnDynamic, InsertFromOverflow3) +{ + auto column_from = ColumnDynamic::create(1); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = ColumnDynamic::create(254); + column_to->insert(Field(41)); + + column_to->insertFrom(*column_from, 0); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_EQ(column_to->getSharedVariant().size(), 0); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 1); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); } void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) @@ -256,42 +297,43 @@ void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDy TEST(ColumnDynamic, InsertManyFrom1) { - auto column_to = ColumnDynamic::create(255); - checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + auto column_to = ColumnDynamic::create(254); + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertManyFrom2) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str")); - checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertManyFrom3) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str")); column_to->insert(Array({Field(42)})); - checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}}); } TEST(ColumnDynamic, InsertManyFromOverflow1) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); auto column_to = getDynamicWithManyVariants(253); column_to->insertManyFrom(*column_from, 0, 2); - ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_EQ(column_to->getSharedVariant().size(), 0); auto field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, 42); field = (*column_to)[column_to->size() - 1]; @@ -300,15 +342,17 @@ TEST(ColumnDynamic, InsertManyFromOverflow1) column_to->insertManyFrom(*column_from, 1, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); field = (*column_to)[column_to->size() - 2]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); column_to->insertManyFrom(*column_from, 2, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 4); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, "str"); field = (*column_to)[column_to->size() - 2]; @@ -317,14 +361,15 @@ TEST(ColumnDynamic, InsertManyFromOverflow1) TEST(ColumnDynamic, InsertManyFromOverflow2) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); auto column_to = getDynamicWithManyVariants(253); column_to->insertManyFrom(*column_from, 0, 2); - ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_EQ(column_to->getSharedVariant().size(), 0); auto field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, 42); field = (*column_to)[column_to->size() - 1]; @@ -333,11 +378,39 @@ TEST(ColumnDynamic, InsertManyFromOverflow2) column_to->insertManyFrom(*column_from, 1, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); field = (*column_to)[column_to->size() - 2]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, "42.42"); + ASSERT_EQ(field, 42.42); +} + + +TEST(ColumnDynamic, InsertManyFromOverflow3) +{ + auto column_from = ColumnDynamic::create(1); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = ColumnDynamic::create(254); + column_to->insert(Field(41)); + + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_EQ(column_to->getSharedVariant().size(), 0); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); } void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) @@ -368,34 +441,34 @@ void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnD TEST(ColumnDynamic, InsertRangeFrom1) { - auto column_to = ColumnDynamic::create(255); - checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + auto column_to = ColumnDynamic::create(254); + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertRangeFrom2) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str1")); - checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}); } TEST(ColumnDynamic, InsertRangeFrom3) { - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); column_to->insert(Field(42)); column_to->insert(Field(42.42)); column_to->insert(Field("str1")); column_to->insert(Array({Field(42)})); - checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}}); } TEST(ColumnDynamic, InsertRangeFromOverflow1) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(43)); column_from->insert(Field(42.42)); @@ -403,23 +476,25 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1) auto column_to = getDynamicWithManyVariants(253); column_to->insertRangeFrom(*column_from, 0, 4); + ASSERT_EQ(column_to->size(), 257); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); auto field = (*column_to)[column_to->size() - 4]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(43)); field = (*column_to)[column_to->size() - 2]; - ASSERT_EQ(field, Field("42.42")); + ASSERT_EQ(field, Field(42.42)); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, Field("str")); } TEST(ColumnDynamic, InsertRangeFromOverflow2) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(43)); column_from->insert(Field(42.42)); @@ -428,19 +503,20 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2) column_to->insertRangeFrom(*column_from, 0, 3); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 1); auto field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, Field(43)); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, Field("42.42")); + ASSERT_EQ(field, Field(42.42)); } TEST(ColumnDynamic, InsertRangeFromOverflow3) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(43)); column_from->insert(Field(42.42)); @@ -449,20 +525,21 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3) column_to->insert(Field("Str")); column_to->insertRangeFrom(*column_from, 0, 3); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 3); auto field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, Field(43)); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, Field("42.42")); + ASSERT_EQ(field, Field(42.42)); } TEST(ColumnDynamic, InsertRangeFromOverflow4) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); @@ -471,19 +548,20 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4) column_to->insertRangeFrom(*column_from, 0, 3); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 3); auto field = (*column_to)[column_to->size() - 3]; - ASSERT_EQ(field, Field("42")); + ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 2]; - ASSERT_EQ(field, Field("42.42")); + ASSERT_EQ(field, Field(42.42)); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, Field("str")); } TEST(ColumnDynamic, InsertRangeFromOverflow5) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(43)); column_from->insert(Field(42.42)); @@ -493,22 +571,23 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5) column_to->insert(Field("str")); column_to->insertRangeFrom(*column_from, 0, 4); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_EQ(column_to->getSharedVariant().size(), 3); auto field = (*column_to)[column_to->size() - 4]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(43)); field = (*column_to)[column_to->size() - 2]; - ASSERT_EQ(field, Field("42.42")); + ASSERT_EQ(field, Field(42.42)); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, Field("str")); } TEST(ColumnDynamic, InsertRangeFromOverflow6) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(43)); column_from->insert(Field(44)); @@ -520,13 +599,14 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6) auto column_to = getDynamicWithManyVariants(253); column_to->insertRangeFrom(*column_from, 2, 5); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_EQ(column_to->getSharedVariant().size(), 4); auto field = (*column_to)[column_to->size() - 5]; - ASSERT_EQ(field, Field("44")); + ASSERT_EQ(field, Field(44)); field = (*column_to)[column_to->size() - 4]; ASSERT_EQ(field, Field(42.42)); field = (*column_to)[column_to->size() - 3]; @@ -534,12 +614,136 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6) field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, Field("str")); field = (*column_to)[column_to->size() - 1]; - ASSERT_EQ(field, Field("[42]")); + ASSERT_EQ(field, Field(Array({Field(42)}))); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow7) +{ + auto column_from = ColumnDynamic::create(2); + column_from->insert(Field(42.42)); + column_from->insert(Field("str1")); + column_from->insert(Field(42)); + column_from->insert(Field(43.43)); + column_from->insert(Field(Array({Field(41)}))); + column_from->insert(Field(43)); + column_from->insert(Field("str2")); + column_from->insert(Field(Array({Field(42)}))); + + auto column_to = ColumnDynamic::create(254); + column_to->insert(Field(42)); + + column_to->insertRangeFrom(*column_from, 0, 8); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 4); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); + auto field = (*column_to)[column_to->size() - 8]; + ASSERT_EQ(field, Field(42.42)); + field = (*column_to)[column_to->size() - 7]; + ASSERT_EQ(field, Field("str1")); + field = (*column_to)[column_to->size() - 6]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 5]; + ASSERT_EQ(field, Field(43.43)); + field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(Array({Field(41)}))); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("str2")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field(Array({Field(42)}))); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow8) +{ + auto column_from = ColumnDynamic::create(2); + column_from->insert(Field(42.42)); + column_from->insert(Field("str1")); + column_from->insert(Field(42)); + column_from->insert(Field(43.43)); + column_from->insert(Field(Array({Field(41)}))); + column_from->insert(Field(43)); + column_from->insert(Field("str2")); + column_from->insert(Field(Array({Field(42)}))); + + auto column_to = ColumnDynamic::create(2); + column_to->insert(Field(42)); + column_from->insert(Field("str1")); + + column_to->insertRangeFrom(*column_from, 0, 8); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_EQ(column_to->getSharedVariant().size(), 4); + auto field = (*column_to)[column_to->size() - 8]; + ASSERT_EQ(field, Field(42.42)); + field = (*column_to)[column_to->size() - 7]; + ASSERT_EQ(field, Field("str1")); + field = (*column_to)[column_to->size() - 6]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 5]; + ASSERT_EQ(field, Field(43.43)); + field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(Array({Field(41)}))); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("str2")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field(Array({Field(42)}))); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow9) +{ + auto column_from = ColumnDynamic::create(3); + column_from->insert(Field("str1")); + column_from->insert(Field(42.42)); + column_from->insert(Field("str2")); + column_from->insert(Field(42)); + column_from->insert(Field(43.43)); + column_from->insert(Field(Array({Field(41)}))); + column_from->insert(Field(43)); + column_from->insert(Field("str2")); + column_from->insert(Field(Array({Field(42)}))); + + auto column_to = ColumnDynamic::create(2); + column_to->insert(Field(42)); + + column_to->insertRangeFrom(*column_from, 0, 9); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_EQ(column_to->getSharedVariant().size(), 4); + auto field = (*column_to)[column_to->size() - 9]; + ASSERT_EQ(field, Field("str1")); + field = (*column_to)[column_to->size() - 8]; + ASSERT_EQ(field, Field(42.42)); + field = (*column_to)[column_to->size() - 7]; + ASSERT_EQ(field, Field("str2")); + field = (*column_to)[column_to->size() - 6]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 5]; + ASSERT_EQ(field, Field(43.43)); + field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(Array({Field(41)}))); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("str2")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field(Array({Field(42)}))); } TEST(ColumnDynamic, SerializeDeserializeFromArena1) { - auto column = ColumnDynamic::create(255); + auto column = ColumnDynamic::create(254); column->insert(Field(42)); column->insert(Field(42.42)); column->insert(Field("str")); @@ -564,7 +768,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena1) TEST(ColumnDynamic, SerializeDeserializeFromArena2) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); @@ -577,26 +781,26 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2) column_from->serializeValueIntoArena(2, arena, pos); column_from->serializeValueIntoArena(3, arena, pos); - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); pos = column_to->deserializeAndInsertFromArena(ref1.data); pos = column_to->deserializeAndInsertFromArena(pos); pos = column_to->deserializeAndInsertFromArena(pos); column_to->deserializeAndInsertFromArena(pos); - ASSERT_EQ((*column_from)[column_from->size() - 4], 42); - ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); - ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); - ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); - ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); - std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ((*column_to)[column_to->size() - 4], 42); + ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42); + ASSERT_EQ((*column_to)[column_to->size() - 2], "str"); + ASSERT_EQ((*column_to)[column_to->size() - 1], Null()); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)"); + std::vector expected_names = {"Float64", "Int8", "SharedVariant", "String"}; ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); - std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}}; ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); } -TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow) +TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow1) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(254); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); @@ -615,18 +819,56 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow) pos = column_to->deserializeAndInsertFromArena(pos); column_to->deserializeAndInsertFromArena(pos); - ASSERT_EQ((*column_from)[column_from->size() - 4], 42); - ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); - ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); - ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); + ASSERT_EQ((*column_to)[column_to->size() - 4], 42); + ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42); + ASSERT_EQ((*column_to)[column_to->size() - 2], "str"); + ASSERT_EQ((*column_to)[column_to->size() - 1], Null()); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow2) +{ + auto column_from = ColumnDynamic::create(2); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + column_from->insert(Field(Array({Field(42)}))); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + column_from->serializeValueIntoArena(3, arena, pos); + column_from->serializeValueIntoArena(4, arena, pos); + + auto column_to = ColumnDynamic::create(2); + column_to->insert(Field(42.42)); + pos = column_to->deserializeAndInsertFromArena(ref1.data); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + column_to->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column_to)[column_to->size() - 5], 42); + ASSERT_EQ((*column_to)[column_to->size() - 4], 42.42); + ASSERT_EQ((*column_to)[column_to->size() - 3], "str"); + ASSERT_EQ((*column_to)[column_to->size() - 2], Null()); + ASSERT_EQ((*column_to)[column_to->size() - 1], Field(Array({Field(42)}))); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_EQ(column_to->getSharedVariant().size(), 2); } TEST(ColumnDynamic, skipSerializedInArena) { - auto column_from = ColumnDynamic::create(255); + auto column_from = ColumnDynamic::create(3); column_from->insert(Field(42)); column_from->insert(Field(42.42)); column_from->insert(Field("str")); @@ -640,13 +882,41 @@ TEST(ColumnDynamic, skipSerializedInArena) auto ref4 = column_from->serializeValueIntoArena(3, arena, pos); const char * end = ref4.data + ref4.size; - auto column_to = ColumnDynamic::create(255); + auto column_to = ColumnDynamic::create(254); pos = column_to->skipSerializedInArena(ref1.data); pos = column_to->skipSerializedInArena(pos); pos = column_to->skipSerializedInArena(pos); pos = column_to->skipSerializedInArena(pos); ASSERT_EQ(pos, end); - ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty()); - ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty()); + ASSERT_EQ(column_to->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0); + ASSERT_EQ(column_to->getVariantInfo().variant_names, Names{"SharedVariant"}); +} + +TEST(ColumnDynamic, compare) +{ + auto column_from = ColumnDynamic::create(3); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + column_from->insert(Field(Array({Field(42)}))); + + ASSERT_EQ(column_from->compareAt(0, 0, *column_from, -1), 0); + ASSERT_EQ(column_from->compareAt(0, 1, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(1, 1, *column_from, -1), 0); + ASSERT_EQ(column_from->compareAt(0, 2, *column_from, -1), -1); + ASSERT_EQ(column_from->compareAt(2, 0, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(2, 4, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(4, 2, *column_from, -1), -1); + ASSERT_EQ(column_from->compareAt(4, 4, *column_from, -1), 0); + ASSERT_EQ(column_from->compareAt(0, 3, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(1, 3, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(2, 3, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(3, 3, *column_from, -1), 0); + ASSERT_EQ(column_from->compareAt(4, 3, *column_from, -1), 1); + ASSERT_EQ(column_from->compareAt(3, 0, *column_from, -1), -1); + ASSERT_EQ(column_from->compareAt(3, 1, *column_from, -1), -1); + ASSERT_EQ(column_from->compareAt(3, 2, *column_from, -1), -1); + ASSERT_EQ(column_from->compareAt(3, 4, *column_from, -1), -1); } diff --git a/src/Columns/tests/gtest_column_object.cpp b/src/Columns/tests/gtest_column_object.cpp new file mode 100644 index 00000000000..f6a1da64ba3 --- /dev/null +++ b/src/Columns/tests/gtest_column_object.cpp @@ -0,0 +1,351 @@ +#include +#include +#include +#include +#include + +#include +#include + +using namespace DB; + +TEST(ColumnObject, CreateEmpty) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=20, a.b UInt32, a.c Array(String))"); + auto col = type->createColumn(); + const auto & col_object = assert_cast(*col); + const auto & typed_paths = col_object.getTypedPaths(); + ASSERT_TRUE(typed_paths.contains("a.b")); + ASSERT_EQ(typed_paths.at("a.b")->getName(), "UInt32"); + ASSERT_TRUE(typed_paths.contains("a.c")); + ASSERT_EQ(typed_paths.at("a.c")->getName(), "Array(String)"); + ASSERT_TRUE(col_object.getDynamicPaths().empty()); + ASSERT_TRUE(col_object.getSharedDataOffsets().empty()); + ASSERT_TRUE(col_object.getSharedDataPathsAndValues().first->empty()); + ASSERT_TRUE(col_object.getSharedDataPathsAndValues().second->empty()); + ASSERT_EQ(col_object.getMaxDynamicTypes(), 10); + ASSERT_EQ(col_object.getMaxDynamicPaths(), 20); +} + +TEST(ColumnObject, GetName) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=20, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + ASSERT_EQ(col->getName(), "Object(max_dynamic_paths=20, max_dynamic_types=10, a.b Array(String), b.d UInt32)"); +} + +Field deserializeFieldFromSharedData(ColumnString * values, size_t n) +{ + auto data = values->getDataAt(n); + ReadBufferFromMemory buf(data.data, data.size); + Field res; + std::make_shared()->deserializeBinary(res, buf, FormatSettings()); + return res; +} + +TEST(ColumnObject, InsertField) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + const auto & typed_paths = col_object.getTypedPaths(); + const auto & dynamic_paths = col_object.getDynamicPaths(); + const auto & shared_data_nested_column = col_object.getSharedDataNestedColumn(); + const auto & shared_data_offsets = col_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = col_object.getSharedDataPathsAndValues(); + Object empty_object; + col_object.insert(empty_object); + ASSERT_EQ(col_object[0], (Object{{"a.b", Array{}}, {"b.d", Field(0u)}})); + ASSERT_EQ(typed_paths.at("a.b")->size(), 1); + ASSERT_TRUE(typed_paths.at("a.b")->isDefaultAt(0)); + ASSERT_EQ(typed_paths.at("b.d")->size(), 1); + ASSERT_TRUE(typed_paths.at("b.d")->isDefaultAt(0)); + ASSERT_TRUE(dynamic_paths.empty()); + ASSERT_EQ(shared_data_nested_column.size(), 1); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(0)); + + Object object1 = {{"a.b", Array{String("Hello"), String("World")}}, {"a.c", Field(42)}}; + col_object.insert(object1); + ASSERT_EQ(col_object[1], (Object{{"a.b", Array{String("Hello"), String("World")}}, {"b.d", Field(0u)}, {"a.c", Field(42)}})); + ASSERT_EQ(typed_paths.at("a.b")->size(), 2); + ASSERT_EQ((*typed_paths.at("a.b"))[1], (Array{String("Hello"), String("World")})); + ASSERT_EQ(typed_paths.at("b.d")->size(), 2); + ASSERT_TRUE(typed_paths.at("b.d")->isDefaultAt(1)); + ASSERT_EQ(dynamic_paths.size(), 1); + ASSERT_TRUE(dynamic_paths.contains("a.c")); + ASSERT_EQ(dynamic_paths.at("a.c")->size(), 2); + ASSERT_TRUE(dynamic_paths.at("a.c")->isDefaultAt(0)); + ASSERT_EQ((*dynamic_paths.at("a.c"))[1], Field(42)); + ASSERT_EQ(shared_data_nested_column.size(), 2); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(1)); + + Object object2 = {{"b.d", Field(142u)}, {"a.c", Field(43)}, {"a.d", Field("str")}, {"a.e", Field(242)}, {"a.f", Array{Field(42), Field(43)}}}; + col_object.insert(object2); + ASSERT_EQ(col_object[2], (Object{{"a.b", Array{}}, {"b.d", Field(142u)}, {"a.c", Field(43)}, {"a.d", Field("str")}, {"a.e", Field(242)}, {"a.f", Array{Field(42), Field(43)}}})); + ASSERT_EQ(typed_paths.at("a.b")->size(), 3); + ASSERT_TRUE(typed_paths.at("a.b")->isDefaultAt(2)); + ASSERT_EQ(typed_paths.at("b.d")->size(), 3); + ASSERT_EQ((*typed_paths.at("b.d"))[2], Field(142u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_TRUE(dynamic_paths.contains("a.c")); + ASSERT_EQ(dynamic_paths.at("a.c")->size(), 3); + ASSERT_EQ((*dynamic_paths.at("a.c"))[2], Field(43)); + ASSERT_TRUE(dynamic_paths.contains("a.d")); + ASSERT_EQ(dynamic_paths.at("a.d")->size(), 3); + ASSERT_EQ((*dynamic_paths.at("a.d"))[2], Field("str")); + + ASSERT_EQ(shared_data_nested_column.size(), 3); + ASSERT_EQ(shared_data_offsets[2] - shared_data_offsets[1], 2); + ASSERT_EQ((*shared_data_paths)[0], "a.e"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 0), Field(242)); + ASSERT_EQ((*shared_data_paths)[1], "a.f"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 1), (Array({Field(42), Field(43)}))); + + Object object3 = {{"b.a", Field("Str")}, {"b.b", Field(2)}, {"b.c", Field(Tuple{Field(42), Field("Str")})}}; + col_object.insert(object3); + ASSERT_EQ(col_object[3], (Object{{"a.b", Array{}}, {"b.d", Field(0u)}, {"b.a", Field("Str")}, {"b.b", Field(2)}, {"b.c", Field(Tuple{Field(42), Field("Str")})}})); + ASSERT_EQ(typed_paths.at("a.b")->size(), 4); + ASSERT_TRUE(typed_paths.at("a.b")->isDefaultAt(3)); + ASSERT_EQ(typed_paths.at("b.d")->size(), 4); + ASSERT_TRUE(typed_paths.at("b.d")->isDefaultAt(3)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ(dynamic_paths.at("a.c")->size(), 4); + ASSERT_TRUE(dynamic_paths.at("a.c")->isDefaultAt(3)); + ASSERT_EQ(dynamic_paths.at("a.d")->size(), 4); + ASSERT_TRUE(dynamic_paths.at("a.d")->isDefaultAt(3)); + + ASSERT_EQ(shared_data_nested_column.size(), 4); + ASSERT_EQ(shared_data_offsets[3] - shared_data_offsets[2], 3); + ASSERT_EQ((*shared_data_paths)[2], "b.a"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 2), Field("Str")); + ASSERT_EQ((*shared_data_paths)[3], "b.b"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 3), Field(2)); + ASSERT_EQ((*shared_data_paths)[4], "b.c"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 4), Field(Tuple{Field(42), Field("Str")})); + + Object object4 = {{"c.c", Field(Null())}, {"c.d", Field(Null())}}; + col_object.insert(object4); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(4)); +} + +TEST(ColumnObject, InsertFrom) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"a.a", Field(42)}}); + + const auto & typed_paths = col_object.getTypedPaths(); + const auto & dynamic_paths = col_object.getDynamicPaths(); + const auto & shared_data_nested_column = col_object.getSharedDataNestedColumn(); + const auto & shared_data_offsets = col_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = col_object.getSharedDataPathsAndValues(); + + auto src_col1 = type->createColumn(); + auto & src_col_object1 = assert_cast(*src_col1); + src_col_object1.insert(Object{{"b.d", Field(43u)}, {"a.c", Field("Str1")}}); + col_object.insertFrom(src_col_object1, 0); + ASSERT_EQ((*typed_paths.at("a.b"))[1], Field(Array{})); + ASSERT_EQ((*typed_paths.at("b.d"))[1], Field(43u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[1], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[1], Field("Str1")); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(1)); + + auto src_col2 = type->createColumn(); + auto & src_col_object2 = assert_cast(*src_col2); + src_col_object2.insert(Object{{"a.b", Array{"Str4", "Str5"}}, {"b.d", Field(44u)}, {"a.d", Field("Str2")}, {"a.e", Field("Str3")}}); + col_object.insertFrom(src_col_object2, 0); + ASSERT_EQ((*typed_paths.at("a.b"))[2], Field(Array{"Str4", "Str5"})); + ASSERT_EQ((*typed_paths.at("b.d"))[2], Field(44u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[2], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[2], Field(Null())); + ASSERT_EQ(shared_data_offsets[2] - shared_data_offsets[1], 2); + ASSERT_EQ((*shared_data_paths)[0], "a.d"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 0), Field("Str2")); + ASSERT_EQ((*shared_data_paths)[1], "a.e"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 1), Field("Str3")); + + auto src_col3 = type->createColumn(); + auto & src_col_object3 = assert_cast(*src_col3); + src_col_object3.insert(Object{{"a.h", Field("Str6")}, {"h.h", Field("Str7")}}); + src_col_object3.insert(Object{{"a.a", Field("Str10")}, {"a.c", Field(45u)}, {"a.h", Field("Str6")}, {"h.h", Field("Str7")}, {"a.f", Field("Str8")}, {"a.g", Field("Str9")}, {"a.i", Field("Str11")}, {"a.u", Field(Null())}}); + col_object.insertFrom(src_col_object3, 1); + ASSERT_EQ((*typed_paths.at("a.b"))[3], Field(Array{})); + ASSERT_EQ((*typed_paths.at("b.d"))[3], Field(0u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[3], Field("Str10")); + ASSERT_EQ((*dynamic_paths.at("a.c"))[3], Field(45u)); + ASSERT_EQ(shared_data_offsets[3] - shared_data_offsets[2], 5); + ASSERT_EQ((*shared_data_paths)[2], "a.f"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 2), Field("Str8")); + ASSERT_EQ((*shared_data_paths)[3], "a.g"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 3), Field("Str9")); + ASSERT_EQ((*shared_data_paths)[4], "a.h"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 4), Field("Str6")); + ASSERT_EQ((*shared_data_paths)[5], "a.i"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 5), Field("Str11")); + ASSERT_EQ((*shared_data_paths)[6], "h.h"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 6), Field("Str7")); +} + + +TEST(ColumnObject, InsertRangeFrom) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"a.a", Field(42)}}); + + const auto & typed_paths = col_object.getTypedPaths(); + const auto & dynamic_paths = col_object.getDynamicPaths(); + const auto & shared_data_nested_column = col_object.getSharedDataNestedColumn(); + const auto & shared_data_offsets = col_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = col_object.getSharedDataPathsAndValues(); + + auto src_col1 = type->createColumn(); + auto & src_col_object1 = assert_cast(*src_col1); + src_col_object1.insert(Object{{"b.d", Field(43u)}, {"a.c", Field("Str1")}}); + src_col_object1.insert(Object{{"a.b", Field(Array{"Str1", "Str2"})}, {"a.a", Field("Str1")}}); + src_col_object1.insert(Object{{"b.d", Field(45u)}, {"a.c", Field("Str2")}}); + col_object.insertRangeFrom(src_col_object1, 0, 3); + ASSERT_EQ((*typed_paths.at("a.b"))[1], Field(Array{})); + ASSERT_EQ((*typed_paths.at("a.b"))[2], Field(Array{"Str1", "Str2"})); + ASSERT_EQ((*typed_paths.at("a.b"))[3], Field(Array{})); + ASSERT_EQ((*typed_paths.at("b.d"))[1], Field(43u)); + ASSERT_EQ((*typed_paths.at("b.d"))[2], Field(0u)); + ASSERT_EQ((*typed_paths.at("b.d"))[3], Field(45u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[1], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.a"))[2], Field("Str1")); + ASSERT_EQ((*dynamic_paths.at("a.a"))[3], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[1], Field("Str1")); + ASSERT_EQ((*dynamic_paths.at("a.c"))[2], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[3], Field("Str2")); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(1)); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(2)); + ASSERT_TRUE(shared_data_nested_column.isDefaultAt(3)); + + auto src_col2 = type->createColumn(); + auto & src_col_object2 = assert_cast(*src_col2); + src_col_object2.insert(Object{{"a.b", Array{"Str4", "Str5"}}, {"a.d", Field("Str2")}, {"a.e", Field("Str3")}}); + src_col_object2.insert(Object{{"b.d", Field(44u)}, {"a.d", Field("Str22")}, {"a.e", Field("Str33")}}); + src_col_object2.insert(Object{{"a.b", Array{"Str44", "Str55"}}, {"a.d", Field("Str222")}, {"a.e", Field("Str333")}}); + col_object.insertRangeFrom(src_col_object2, 0, 3); + ASSERT_EQ((*typed_paths.at("a.b"))[4], Field(Array{"Str4", "Str5"})); + ASSERT_EQ((*typed_paths.at("a.b"))[5], Field(Array{})); + ASSERT_EQ((*typed_paths.at("a.b"))[6], Field(Array{"Str44", "Str55"})); + ASSERT_EQ((*typed_paths.at("b.d"))[4], Field(0u)); + ASSERT_EQ((*typed_paths.at("b.d"))[5], Field(44u)); + ASSERT_EQ((*typed_paths.at("b.d"))[6], Field(0u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[4], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.a"))[5], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.a"))[6], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[4], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[5], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[6], Field(Null())); + ASSERT_EQ(shared_data_offsets[4] - shared_data_offsets[3], 2); + ASSERT_EQ((*shared_data_paths)[0], "a.d"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 0), Field("Str2")); + ASSERT_EQ((*shared_data_paths)[1], "a.e"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 1), Field("Str3")); + ASSERT_EQ(shared_data_offsets[5] - shared_data_offsets[4], 2); + ASSERT_EQ((*shared_data_paths)[2], "a.d"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 2), Field("Str22")); + ASSERT_EQ((*shared_data_paths)[3], "a.e"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 3), Field("Str33")); + ASSERT_EQ(shared_data_offsets[6] - shared_data_offsets[5], 2); + ASSERT_EQ((*shared_data_paths)[4], "a.d"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 4), Field("Str222")); + ASSERT_EQ((*shared_data_paths)[5], "a.e"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 5), Field("Str333")); + + auto src_col3 = type->createColumn(); + auto & src_col_object3 = assert_cast(*src_col3); + src_col_object3.insert(Object{{"a.h", Field("Str6")}, {"h.h", Field("Str7")}}); + src_col_object3.insert(Object{{"a.h", Field("Str6")}, {"h.h", Field("Str7")}, {"a.f", Field("Str8")}, {"a.g", Field("Str9")}, {"a.i", Field("Str11")}}); + src_col_object3.insert(Object{{"a.a", Field("Str10")}}); + src_col_object3.insert(Object{{"a.h", Field("Str6")}, {"a.c", Field(45u)}, {"h.h", Field("Str7")}, {"a.i", Field("Str11")}}); + col_object.insertRangeFrom(src_col_object3, 1, 3); + ASSERT_EQ((*typed_paths.at("a.b"))[7], Field(Array{})); + ASSERT_EQ((*typed_paths.at("a.b"))[8], Field(Array{})); + ASSERT_EQ((*typed_paths.at("a.b"))[9], Field(Array{})); + ASSERT_EQ((*typed_paths.at("b.d"))[7], Field(0u)); + ASSERT_EQ((*typed_paths.at("b.d"))[8], Field(0u)); + ASSERT_EQ((*typed_paths.at("b.d"))[9], Field(0u)); + ASSERT_EQ(dynamic_paths.size(), 2); + ASSERT_EQ((*dynamic_paths.at("a.a"))[7], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.a"))[8], Field("Str10")); + ASSERT_EQ((*dynamic_paths.at("a.a"))[9], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[7], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[8], Field(Null())); + ASSERT_EQ((*dynamic_paths.at("a.c"))[9], Field(45u)); + ASSERT_EQ(shared_data_offsets[7] - shared_data_offsets[6], 5); + ASSERT_EQ((*shared_data_paths)[6], "a.f"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 6), Field("Str8")); + ASSERT_EQ((*shared_data_paths)[7], "a.g"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 7), Field("Str9")); + ASSERT_EQ((*shared_data_paths)[8], "a.h"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 8), Field("Str6")); + ASSERT_EQ((*shared_data_paths)[9], "a.i"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 9), Field("Str11")); + ASSERT_EQ((*shared_data_paths)[10], "h.h"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 10), Field("Str7")); + ASSERT_EQ(shared_data_offsets[8] - shared_data_offsets[7], 0); + ASSERT_EQ(shared_data_offsets[9] - shared_data_offsets[8], 3); + ASSERT_EQ((*shared_data_paths)[11], "a.h"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 11), Field("Str6")); + ASSERT_EQ((*shared_data_paths)[12], "a.i"); + ASSERT_EQ(deserializeFieldFromSharedData(shared_data_values, 12), Field("Str11")); +} + +TEST(ColumnObject, SerializeDeserializerFromArena) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"b.d", Field(42u)}, {"a.b", Array{"Str1", "Str2"}}, {"a.a", Tuple{"Str3", 441u}}, {"a.c", Field("Str4")}, {"a.d", Array{Field(45), Field(46)}}, {"a.e", Field(47)}}); + col_object.insert(Object{{"b.a", Field(48)}, {"b.b", Array{Field(49), Field(50)}}}); + col_object.insert(Object{{"b.d", Field(442u)}, {"a.b", Array{"Str11", "Str22"}}, {"a.a", Tuple{"Str33", 444u}}, {"a.c", Field("Str44")}, {"a.d", Array{Field(445), Field(446)}}, {"a.e", Field(447)}}); + + Arena arena; + const char * pos = nullptr; + auto ref1 = col_object.serializeValueIntoArena(0, arena, pos); + col_object.serializeValueIntoArena(1, arena, pos); + col_object.serializeValueIntoArena(2, arena, pos); + + auto col2 = type->createColumn(); + auto & col_object2 = assert_cast(*col); + pos = col_object2.deserializeAndInsertFromArena(ref1.data); + pos = col_object2.deserializeAndInsertFromArena(pos); + col_object2.deserializeAndInsertFromArena(pos); + + ASSERT_EQ(col_object2[0], (Object{{"b.d", Field(42u)}, {"a.b", Array{"Str1", "Str2"}}, {"a.a", Tuple{"Str3", 441u}}, {"a.c", Field("Str4")}, {"a.d", Array{Field(45), Field(46)}}, {"a.e", Field(47)}})); + ASSERT_EQ(col_object2[1], (Object{{"b.d", Field{0u}}, {"a.b", Array{}}, {"b.a", Field(48)}, {"b.b", Array{Field(49), Field(50)}}})); + ASSERT_EQ(col_object2[2], (Object{{"b.d", Field(442u)}, {"a.b", Array{"Str11", "Str22"}}, {"a.a", Tuple{"Str33", 444u}}, {"a.c", Field("Str44")}, {"a.d", Array{Field(445), Field(446)}}, {"a.e", Field(447)}})); +} + +TEST(ColumnObject, SkipSerializedInArena) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, b.d UInt32, a.b Array(String))"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"b.d", Field(42u)}, {"a.b", Array{"Str1", "Str2"}}, {"a.a", Tuple{"Str3", 441u}}, {"a.c", Field("Str4")}, {"a.d", Array{Field(45), Field(46)}}, {"a.e", Field(47)}}); + col_object.insert(Object{{"b.a", Field(48)}, {"b.b", Array{Field(49), Field(50)}}}); + col_object.insert(Object{{"b.d", Field(442u)}, {"a.b", Array{"Str11", "Str22"}}, {"a.a", Tuple{"Str33", 444u}}, {"a.c", Field("Str44")}, {"a.d", Array{Field(445), Field(446)}}, {"a.e", Field(447)}}); + + Arena arena; + const char * pos = nullptr; + auto ref1 = col_object.serializeValueIntoArena(0, arena, pos); + col_object.serializeValueIntoArena(1, arena, pos); + auto ref3 = col_object.serializeValueIntoArena(2, arena, pos); + + const char * end = ref3.data + ref3.size; + auto col2 = type->createColumn(); + pos = col2->skipSerializedInArena(ref1.data); + pos = col2->skipSerializedInArena(pos); + pos = col2->skipSerializedInArena(pos); + ASSERT_EQ(pos, end); +} diff --git a/src/Columns/tests/gtest_column_variant.cpp b/src/Columns/tests/gtest_column_variant.cpp index 25f276b9600..5e481b88409 100644 --- a/src/Columns/tests/gtest_column_variant.cpp +++ b/src/Columns/tests/gtest_column_variant.cpp @@ -108,10 +108,10 @@ void checkColumnVariant1(ColumnVariant * column) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4)); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); - ASSERT_EQ((*column)[3].get(), "World"); + ASSERT_EQ((*column)[3].safeGet(), "World"); ASSERT_TRUE((*column)[4].isNull()); } @@ -209,9 +209,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); } TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder) @@ -222,9 +222,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrde ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); ASSERT_EQ(column->localDiscriminatorAt(0), 2); ASSERT_EQ(column->localDiscriminatorAt(1), 2); ASSERT_EQ(column->localDiscriminatorAt(2), 2); @@ -331,9 +331,9 @@ TEST(ColumnVariant, CloneResizedGeneral1) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } TEST(ColumnVariant, CloneResizedGeneral2) @@ -367,7 +367,7 @@ TEST(ColumnVariant, CloneResizedGeneral2) ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR); const auto & offsets = resized_column_variant->getOffsets(); ASSERT_EQ(offsets[0], 0); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); } TEST(ColumnVariant, CloneResizedGeneral3) @@ -405,10 +405,10 @@ TEST(ColumnVariant, CloneResizedGeneral3) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[2], 1); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[2].get(), "World"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[2].safeGet(), "World"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } MutableColumnPtr createDiscriminators2() @@ -465,7 +465,7 @@ TEST(ColumnVariant, InsertFrom) auto column_from = createVariantColumn2(change_order); column_to->insertFrom(*column_from, 3); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); - ASSERT_EQ((*column_to)[5].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); } } @@ -478,8 +478,8 @@ TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls) column_to->insertRangeFrom(*column_from, 2, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0); - ASSERT_EQ((*column_to)[7].get(), 2); - ASSERT_EQ((*column_to)[8].get(), 3); + ASSERT_EQ((*column_to)[7].safeGet(), 2); + ASSERT_EQ((*column_to)[8].safeGet(), 3); } } @@ -494,9 +494,9 @@ TEST(ColumnVariant, InsertRangeFromGeneral) ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1); - ASSERT_EQ((*column_to)[5].get(), "Hello"); - ASSERT_EQ((*column_to)[7].get(), 43); - ASSERT_EQ((*column_to)[8].get(), "World"); + ASSERT_EQ((*column_to)[5].safeGet(), "Hello"); + ASSERT_EQ((*column_to)[7].safeGet(), 43); + ASSERT_EQ((*column_to)[8].safeGet(), "World"); } } @@ -509,8 +509,8 @@ TEST(ColumnVariant, InsertManyFrom) column_to->insertManyFrom(*column_from, 3, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0); - ASSERT_EQ((*column_to)[5].get(), 43); - ASSERT_EQ((*column_to)[6].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); + ASSERT_EQ((*column_to)[6].safeGet(), 43); } } @@ -520,8 +520,8 @@ TEST(ColumnVariant, PopBackOneColumnNoNulls) column->popBack(3); ASSERT_EQ(column->size(), 2); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); } TEST(ColumnVariant, PopBackGeneral) @@ -531,8 +531,8 @@ TEST(ColumnVariant, PopBackGeneral) ASSERT_EQ(column->size(), 3); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1); ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); } @@ -545,8 +545,8 @@ TEST(ColumnVariant, FilterOneColumnNoNulls) filter.push_back(1); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 2); - ASSERT_EQ((*filtered_column)[0].get(), 0); - ASSERT_EQ((*filtered_column)[1].get(), 2); + ASSERT_EQ((*filtered_column)[0].safeGet(), 0); + ASSERT_EQ((*filtered_column)[1].safeGet(), 2); } TEST(ColumnVariant, FilterGeneral) @@ -562,7 +562,7 @@ TEST(ColumnVariant, FilterGeneral) filter.push_back(0); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 3); - ASSERT_EQ((*filtered_column)[0].get(), "Hello"); + ASSERT_EQ((*filtered_column)[0].safeGet(), "Hello"); ASSERT_TRUE((*filtered_column)[1].isNull()); ASSERT_TRUE((*filtered_column)[2].isNull()); } @@ -577,9 +577,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) permutation.push_back(0); auto permuted_column = column->permute(permutation, 3); ASSERT_EQ(permuted_column->size(), 3); - ASSERT_EQ((*permuted_column)[0].get(), 1); - ASSERT_EQ((*permuted_column)[1].get(), 3); - ASSERT_EQ((*permuted_column)[2].get(), 2); + ASSERT_EQ((*permuted_column)[0].safeGet(), 1); + ASSERT_EQ((*permuted_column)[1].safeGet(), 3); + ASSERT_EQ((*permuted_column)[2].safeGet(), 2); auto index = ColumnUInt64::create(); index->getData().push_back(1); @@ -588,9 +588,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) index->getData().push_back(0); auto indexed_column = column->index(*index, 3); ASSERT_EQ(indexed_column->size(), 3); - ASSERT_EQ((*indexed_column)[0].get(), 1); - ASSERT_EQ((*indexed_column)[1].get(), 3); - ASSERT_EQ((*indexed_column)[2].get(), 2); + ASSERT_EQ((*indexed_column)[0].safeGet(), 1); + ASSERT_EQ((*indexed_column)[1].safeGet(), 3); + ASSERT_EQ((*indexed_column)[2].safeGet(), 2); } TEST(ColumnVariant, PermuteGeneral) @@ -603,9 +603,9 @@ TEST(ColumnVariant, PermuteGeneral) permutation.push_back(5); auto permuted_column = column->permute(permutation, 4); ASSERT_EQ(permuted_column->size(), 4); - ASSERT_EQ((*permuted_column)[0].get(), 43); - ASSERT_EQ((*permuted_column)[1].get(), "World"); - ASSERT_EQ((*permuted_column)[2].get(), "Hello"); + ASSERT_EQ((*permuted_column)[0].safeGet(), 43); + ASSERT_EQ((*permuted_column)[1].safeGet(), "World"); + ASSERT_EQ((*permuted_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*permuted_column)[3].isNull()); } @@ -618,12 +618,12 @@ TEST(ColumnVariant, ReplicateOneColumnNoNull) offsets.push_back(6); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 6); - ASSERT_EQ((*replicated_column)[0].get(), 1); - ASSERT_EQ((*replicated_column)[1].get(), 1); - ASSERT_EQ((*replicated_column)[2].get(), 1); - ASSERT_EQ((*replicated_column)[3].get(), 2); - ASSERT_EQ((*replicated_column)[4].get(), 2); - ASSERT_EQ((*replicated_column)[5].get(), 2); + ASSERT_EQ((*replicated_column)[0].safeGet(), 1); + ASSERT_EQ((*replicated_column)[1].safeGet(), 1); + ASSERT_EQ((*replicated_column)[2].safeGet(), 1); + ASSERT_EQ((*replicated_column)[3].safeGet(), 2); + ASSERT_EQ((*replicated_column)[4].safeGet(), 2); + ASSERT_EQ((*replicated_column)[5].safeGet(), 2); } TEST(ColumnVariant, ReplicateGeneral) @@ -637,9 +637,9 @@ TEST(ColumnVariant, ReplicateGeneral) offsets.push_back(7); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 7); - ASSERT_EQ((*replicated_column)[0].get(), 42); - ASSERT_EQ((*replicated_column)[1].get(), "Hello"); - ASSERT_EQ((*replicated_column)[2].get(), "Hello"); + ASSERT_EQ((*replicated_column)[0].safeGet(), 42); + ASSERT_EQ((*replicated_column)[1].safeGet(), "Hello"); + ASSERT_EQ((*replicated_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*replicated_column)[3].isNull()); ASSERT_TRUE((*replicated_column)[4].isNull()); ASSERT_TRUE((*replicated_column)[5].isNull()); @@ -657,13 +657,13 @@ TEST(ColumnVariant, ScatterOneColumnNoNulls) selector.push_back(1); auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 2); - ASSERT_EQ((*columns[0])[0].get(), 0); - ASSERT_EQ((*columns[0])[1].get(), 3); + ASSERT_EQ((*columns[0])[0].safeGet(), 0); + ASSERT_EQ((*columns[0])[1].safeGet(), 3); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), 1); - ASSERT_EQ((*columns[1])[1].get(), 4); + ASSERT_EQ((*columns[1])[0].safeGet(), 1); + ASSERT_EQ((*columns[1])[1].safeGet(), 4); ASSERT_EQ(columns[2]->size(), 1); - ASSERT_EQ((*columns[2])[0].get(), 2); + ASSERT_EQ((*columns[2])[0].safeGet(), 2); } TEST(ColumnVariant, ScatterGeneral) @@ -680,12 +680,12 @@ TEST(ColumnVariant, ScatterGeneral) auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 3); - ASSERT_EQ((*columns[0])[0].get(), 42); - ASSERT_EQ((*columns[0])[1].get(), "Hello"); - ASSERT_EQ((*columns[0])[2].get(), 43); + ASSERT_EQ((*columns[0])[0].safeGet(), 42); + ASSERT_EQ((*columns[0])[1].safeGet(), "Hello"); + ASSERT_EQ((*columns[0])[2].safeGet(), 43); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), "World"); - ASSERT_EQ((*columns[1])[1].get(), 44); + ASSERT_EQ((*columns[1])[0].safeGet(), "World"); + ASSERT_EQ((*columns[1])[1].safeGet(), 44); ASSERT_EQ(columns[2]->size(), 2); ASSERT_TRUE((*columns[2])[0].isNull()); ASSERT_TRUE((*columns[2])[1].isNull()); diff --git a/src/Columns/tests/gtest_low_cardinality.cpp b/src/Columns/tests/gtest_low_cardinality.cpp index 5e01279b7df..ce16d2cadb1 100644 --- a/src/Columns/tests/gtest_low_cardinality.cpp +++ b/src/Columns/tests/gtest_low_cardinality.cpp @@ -20,13 +20,13 @@ void testLowCardinalityNumberInsert(const DataTypePtr & data_type) Field value; column->get(0, value); - ASSERT_EQ(value.get(), 15); + ASSERT_EQ(value.safeGet(), 15); column->get(1, value); - ASSERT_EQ(value.get(), 20); + ASSERT_EQ(value.safeGet(), 20); column->get(2, value); - ASSERT_EQ(value.get(), 25); + ASSERT_EQ(value.safeGet(), 25); } TEST(ColumnLowCardinality, Insert) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 56e7c4f3405..9b6a7428411 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -1,18 +1,24 @@ -#include #include -#include -#include -#include -#include -#include -#include + #include #include +#include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include + +#include + #include +#include #include "config.h" @@ -52,6 +58,12 @@ static std::unique_ptr openFileIfExists(const std::stri return {}; } +static void openCgroupv2MetricFile(const std::string & filename, std::optional & out) +{ + if (auto path = getCgroupsV2PathContainingFile(filename)) + openFileIfExists((path.value() + filename).c_str(), out); +}; + #endif @@ -63,21 +75,15 @@ AsynchronousMetrics::AsynchronousMetrics( , protocol_server_metrics_func(protocol_server_metrics_func_) { #if defined(OS_LINUX) - openFileIfExists("/proc/meminfo", meminfo); - openFileIfExists("/proc/loadavg", loadavg); - openFileIfExists("/proc/stat", proc_stat); openFileIfExists("/proc/cpuinfo", cpuinfo); openFileIfExists("/proc/sys/fs/file-nr", file_nr); - openFileIfExists("/proc/uptime", uptime); openFileIfExists("/proc/net/dev", net_dev); /// CGroups v2 - openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes); - if (cgroupmem_limit_in_bytes) - { - openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes); - } - openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max); + openCgroupv2MetricFile("memory.max", cgroupmem_limit_in_bytes); + openCgroupv2MetricFile("memory.current", cgroupmem_usage_in_bytes); + openCgroupv2MetricFile("cpu.max", cgroupcpu_max); + openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat); /// CGroups v1 if (!cgroupmem_limit_in_bytes) @@ -90,6 +96,21 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period); openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota); } + if (!cgroupcpu_stat) + openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); + + if (!cgroupcpu_stat && !cgroupcpuacct_stat) + { + /// The following metrics are not cgroup-aware and we've found cgroup-specific metric files for the similar metrics, + /// so we're better not reporting them at all to avoid confusion + openFileIfExists("/proc/loadavg", loadavg); + openFileIfExists("/proc/stat", proc_stat); + openFileIfExists("/proc/uptime", uptime); + } + + /// The same story for memory metrics + if (!cgroupmem_limit_in_bytes) + openFileIfExists("/proc/meminfo", meminfo); openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count); openFileIfExists("/proc/self/maps", vm_maps); @@ -570,6 +591,151 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet #endif +#if defined(OS_LINUX) +void AsynchronousMetrics::applyCPUMetricsUpdate( + AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier) +{ + new_values["OSUserTime" + cpu_suffix] + = {delta_values.user * multiplier, + "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the " + "host machine, not just clickhouse-server." + " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline " + "stalls, branch mispredictions, running another SMT core)." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSNiceTime" + cpu_suffix] + = {delta_values.nice * multiplier, + "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all " + "the processes on the host machine, not just clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSSystemTime" + cpu_suffix] + = {delta_values.system * multiplier, + "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIdleTime" + cpu_suffix] + = {delta_values.idle * multiplier, + "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This " + "is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." + " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline " + "stalls, branch mispredictions, running another SMT core)." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIOWaitTime" + cpu_suffix] + = {delta_values.iowait * multiplier, + "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as " + "the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just " + "clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIrqTime" + cpu_suffix] + = {delta_values.irq * multiplier, + "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " A high number of this metric may indicate hardware misconfiguration or a very high network load." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSSoftIrqTime" + cpu_suffix] + = {delta_values.softirq * multiplier, + "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " A high number of this metric may indicate inefficient software running on the system." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSStealTime" + cpu_suffix] + = {delta_values.steal * multiplier, + "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide " + "metric, it includes all the processes on the host machine, not just clickhouse-server." + " Not every virtualized environments present this metric, and most of them don't." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSGuestTime" + cpu_suffix] + = {delta_values.guest * multiplier, + "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man " + "procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." + " This metric is irrelevant for ClickHouse, but still exists for completeness." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSGuestNiceTime" + cpu_suffix] + = {delta_values.guest_nice * multiplier, + "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest " + "was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host " + "machine, not just clickhouse-server." + " This metric is irrelevant for ClickHouse, but still exists for completeness." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; +} + +void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate( + AsynchronousMetricValues & new_values, double num_cpus_to_normalize, const ProcStatValuesCPU & delta_values_all_cpus, double multiplier) +{ + chassert(num_cpus_to_normalize); + + new_values["OSUserTimeNormalized"] + = {delta_values_all_cpus.user * multiplier / num_cpus_to_normalize, + "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSNiceTimeNormalized"] + = {delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize, + "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSSystemTimeNormalized"] + = {delta_values_all_cpus.system * multiplier / num_cpus_to_normalize, + "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIdleTimeNormalized"] + = {delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIOWaitTimeNormalized"] + = {delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIrqTimeNormalized"] + = {delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of " + "the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSSoftIrqTimeNormalized"] + = {delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize, + "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval " + "regardless of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSStealTimeNormalized"] + = {delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize, + "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSGuestTimeNormalized"] + = {delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize, + "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSGuestNiceTimeNormalized"] + = {delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize, + "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval " + "regardless of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; +} +#endif + void AsynchronousMetrics::update(TimePoint update_time, bool force_update) { Stopwatch watch; @@ -831,7 +997,68 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."}; } - if (proc_stat) + if (cgroupcpu_stat || cgroupcpuacct_stat) + { + try + { + ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat; + ProcStatValuesCPU current_values{}; + + /// We re-read the file from the beginning each time + in.rewind(); + + while (!in.eof()) + { + String name; + readStringUntilWhitespace(name, in); + skipWhitespaceIfAny(in); + + /// `user_usec` for cgroup v2 and `user` for cgroup v1 + if (name.starts_with("user")) + { + readText(current_values.user, in); + skipToNextLineOrEOF(in); + } + /// `system_usec` for cgroup v2 and `system` for cgroup v1 + else if (name.starts_with("system")) + { + readText(current_values.system, in); + skipToNextLineOrEOF(in); + } + else + skipToNextLineOrEOF(in); + } + + if (!first_run) + { + auto get_clock_ticks = [&]() + { + if (auto hz = sysconf(_SC_CLK_TCK); hz != -1) + return hz; + else + throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); + }; + const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : get_clock_ticks(); + const double multiplier = 1.0 / cgroup_version_specific_divisor + / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); + + const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; + applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier); + if (max_cpu_cgroups > 0) + applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier); + } + + proc_stat_values_all_cpus = current_values; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat); + if (!cgroupcpu_stat) + openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); + } + } + else if (proc_stat) { try { @@ -886,43 +1113,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) else delta_values_all_cpus = delta_values; - new_values["OSUserTime" + cpu_suffix] = { delta_values.user * multiplier, - "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSNiceTime" + cpu_suffix] = { delta_values.nice * multiplier, - "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSSystemTime" + cpu_suffix] = { delta_values.system * multiplier, - "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIdleTime" + cpu_suffix] = { delta_values.idle * multiplier, - "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIOWaitTime" + cpu_suffix] = { delta_values.iowait * multiplier, - "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIrqTime" + cpu_suffix] = { delta_values.irq * multiplier, - "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " A high number of this metric may indicate hardware misconfiguration or a very high network load." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSSoftIrqTime" + cpu_suffix] = { delta_values.softirq * multiplier, - "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " A high number of this metric may indicate inefficient software running on the system." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSStealTime" + cpu_suffix] = { delta_values.steal * multiplier, - "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " Not every virtualized environments present this metric, and most of them don't." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSGuestTime" + cpu_suffix] = { delta_values.guest * multiplier, - "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This metric is irrelevant for ClickHouse, but still exists for completeness." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSGuestNiceTime" + cpu_suffix] = { delta_values.guest_nice * multiplier, - "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This metric is irrelevant for ClickHouse, but still exists for completeness." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; + applyCPUMetricsUpdate(new_values, cpu_suffix, delta_values, multiplier); } prev_values = current_values; @@ -978,38 +1169,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) Float64 num_cpus_to_normalize = max_cpu_cgroups > 0 ? max_cpu_cgroups : num_cpus; if (num_cpus_to_normalize > 0) - { - new_values["OSUserTimeNormalized"] = { delta_values_all_cpus.user * multiplier / num_cpus_to_normalize, - "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSNiceTimeNormalized"] = { delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize, - "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSSystemTimeNormalized"] = { delta_values_all_cpus.system * multiplier / num_cpus_to_normalize, - "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIdleTimeNormalized"] = { delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIOWaitTimeNormalized"] = { delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIrqTimeNormalized"] = { delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSSoftIrqTimeNormalized"] = { delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize, - "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSStealTimeNormalized"] = { delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize, - "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSGuestTimeNormalized"] = { delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize, - "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSGuestNiceTimeNormalized"] = { delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize, - "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - } + applyNormalizedCPUMetricsUpdate(new_values, num_cpus_to_normalize, delta_values_all_cpus, multiplier); } proc_stat_values_other = current_other_values; @@ -1042,8 +1202,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) tryLogCurrentException(__PRETTY_FUNCTION__); } } - - if (meminfo) + else if (meminfo) { try { diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 04d0319e35b..78d07ef4b6c 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -126,6 +126,8 @@ private: std::optional cgroupcpu_cfs_period TSA_GUARDED_BY(data_mutex); std::optional cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex); std::optional cgroupcpu_max TSA_GUARDED_BY(data_mutex); + std::optional cgroupcpu_stat TSA_GUARDED_BY(data_mutex); + std::optional cgroupcpuacct_stat TSA_GUARDED_BY(data_mutex); std::optional vm_max_map_count TSA_GUARDED_BY(data_mutex); std::optional vm_maps TSA_GUARDED_BY(data_mutex); @@ -221,6 +223,16 @@ private: void openBlockDevices(); void openSensorsChips(); void openEDAC(); + + void applyCPUMetricsUpdate( + AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier); + + void applyNormalizedCPUMetricsUpdate( + AsynchronousMetricValues & new_values, + double num_cpus_to_normalize, + const ProcStatValuesCPU & delta_values_all_cpus, + double multiplier); + #endif void run(); diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h index a809136f451..23e6a6fc91c 100644 --- a/src/Common/CacheBase.h +++ b/src/Common/CacheBase.h @@ -197,6 +197,12 @@ public: cache_policy->remove(key); } + void remove(std::function predicate) + { + std::lock_guard lock(mutex); + cache_policy->remove(predicate); + } + size_t sizeInBytes() const { std::lock_guard lock(mutex); diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index ef8bdfc1823..83b04360164 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -144,31 +144,6 @@ private: /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such /// systems existed only for a short transition period. -std::optional getCgroupsV2Path() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - fs::path current_cgroup = cgroupV2PathOfProcess(); - if (current_cgroup.empty()) - return {}; - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - const auto current_path = current_cgroup / "memory.current"; - const auto stat_path = current_cgroup / "memory.stat"; - if (fs::exists(current_path) && fs::exists(stat_path)) - return {current_cgroup}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; @@ -179,7 +154,7 @@ std::optional getCgroupsV1Path() std::pair getCgroupsPath() { - auto v2_path = getCgroupsV2Path(); + auto v2_path = getCgroupsV2PathContainingFile("memory.current"); if (v2_path.has_value()) return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 9f80ff727b4..bcbcc36c67a 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -168,7 +168,7 @@ private: records.emplace(it, type_idx, item); } - Records::const_iterator getImpl(std::type_index type_idx) const + typename Records::const_iterator getImpl(std::type_index type_idx) const { auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index b6dd14d292c..67890568941 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -307,7 +307,7 @@ M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ \ - M(S3DiskNoKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \ + M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 4b577a251af..68a8fa7d74c 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,7 @@ #include #include #include +#include "Common/MultiVersion.h" #include #include "DNSPTRResolverProvider.h" @@ -139,12 +140,6 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) -{ - auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared(resolveIPAddressImpl(host), std::chrono::system_clock::now());}); - return result->addresses; -} - std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -198,21 +193,89 @@ struct DNSResolver::Impl std::atomic disable_cache{false}; }; +struct DNSResolver::AddressFilter +{ + struct DNSFilterSettings + { + bool dns_allow_resolve_names_to_ipv4{true}; + bool dns_allow_resolve_names_to_ipv6{true}; + }; -DNSResolver::DNSResolver() : impl(std::make_unique()), log(getLogger("DNSResolver")) {} + AddressFilter() : settings(std::make_unique()) {} + + void performAddressFiltering(DNSResolver::IPAddresses & addresses) const + { + const auto current_settings = settings.get(); + bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = current_settings->dns_allow_resolve_names_to_ipv6; + + if (dns_resolve_ipv4 && dns_resolve_ipv6) + { + return; + } + if (!dns_resolve_ipv4 && !dns_resolve_ipv6) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); + } + + std::erase_if(addresses, [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }); + } + + void setSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) + { + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6)); + } + + MultiVersion settings; +}; + + +DNSResolver::DNSResolver() + : impl(std::make_unique()) + , addressFilter(std::make_unique()) + , log(getLogger("DNSResolver")) {} + + +DNSResolver::IPAddresses DNSResolver::getResolvedIPAdressessWithFiltering(const std::string & host) +{ + auto addresses = resolveIPAddressImpl(host); + addressFilter->performAddressFiltering(addresses); + + if (addresses.empty()) + { + ProfileEvents::increment(ProfileEvents::DNSError); + throw DB::NetException(ErrorCodes::DNS_ERROR, "After filtering there are no resolved address for host({}).", host); + } + return addresses; +} + +DNSResolver::IPAddresses DNSResolver::resolveIPAddressWithCache(const std::string & host) +{ + auto [result, _ ] = impl->cache_host.getOrSet(host, [&host, this]() {return std::make_shared(getResolvedIPAdressessWithFiltering(host), std::chrono::system_clock::now());}); + return result->addresses; +} Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { return pickAddress(resolveHostAll(host)); // random order -> random pick } +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) +{ + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6); +} + DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) - return resolveIPAddressImpl(host); + return getResolvedIPAdressessWithFiltering(host); addToNewHosts(host); - return resolveIPAddressWithCache(impl->cache_host, host); + return resolveIPAddressWithCache(host); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -232,7 +295,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -241,7 +304,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -254,7 +317,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -419,8 +482,8 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); - auto new_value = resolveIPAddressImpl(host); + const auto old_value = resolveIPAddressWithCache(host); + auto new_value = getResolvedIPAdressessWithFiltering(host); const bool result = old_value != new_value; impl->cache_host.set(host, std::make_shared(std::move(new_value), std::chrono::system_clock::now())); return result; diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 1ddd9d3b991..b35f55dfcd2 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -68,6 +68,8 @@ public: /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); + void setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6); + /// Returns a copy of cache entries std::vector> cacheEntries() const; @@ -86,6 +88,10 @@ private: struct Impl; std::unique_ptr impl; + + struct AddressFilter; + std::unique_ptr addressFilter; + LoggerPtr log; /// Updates cached value and returns true it has been changed. @@ -94,6 +100,9 @@ private: void addToNewHosts(const String & host); void addToNewAddresses(const Poco::Net::IPAddress & address); + + IPAddresses resolveIPAddressWithCache(const std::string & host); + IPAddresses getResolvedIPAdressessWithFiltering(const std::string & host); }; } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b1b8e2367a4..1055b3d34db 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -604,6 +604,11 @@ M(723, PARQUET_EXCEPTION) \ M(724, TOO_MANY_TABLES) \ M(725, TOO_MANY_DATABASES) \ + M(726, UNEXPECTED_HTTP_HEADERS) \ + M(727, UNEXPECTED_TABLE_ENGINE) \ + M(728, UNEXPECTED_DATA_TYPE) \ + M(729, ILLEGAL_TIME_SERIES_TAGS) \ + M(730, REFRESH_FAILED) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index f5ec8cf0356..b2fcbc77c56 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -7,6 +7,8 @@ #include #include +#include "config.h" + namespace DB { @@ -15,7 +17,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; -#if FIU_ENABLE +#if USE_LIBFIU static struct InitFiu { InitFiu() @@ -60,6 +62,7 @@ static struct InitFiu ONCE(receive_timeout_on_table_status_response) \ REGULAR(keepermap_fail_drop_data) \ REGULAR(lazy_pipe_fds_fail_close) \ + PAUSEABLE(infinite_sleep) \ namespace FailPoints @@ -134,7 +137,7 @@ void FailPointInjection::pauseFailPoint(const String & fail_point_name) void FailPointInjection::enableFailPoint(const String & fail_point_name) { -#if FIU_ENABLE +#if USE_LIBFIU #define SUB_M(NAME, flags, pause) \ if (fail_point_name == FailPoints::NAME) \ { \ diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h index b3e1214d597..1af13d08553 100644 --- a/src/Common/FailPoint.h +++ b/src/Common/FailPoint.h @@ -1,17 +1,16 @@ #pragma once -#include "config.h" #include #include #include +#include "config.h" + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #pragma clang diagnostic ignored "-Wreserved-macro-identifier" - -#include -#include - +# include +# include #pragma clang diagnostic pop #include diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp index 6c1a8496fe6..23263c988c3 100644 --- a/src/Common/FieldBinaryEncoding.cpp +++ b/src/Common/FieldBinaryEncoding.cpp @@ -208,7 +208,7 @@ void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) con writeVarUInt(size, buf); for (size_t i = 0; i < size; ++i) { - const Tuple & key_and_value = x[i].get(); + const Tuple & key_and_value = x[i].safeGet(); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); } diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index b825f188586..af9503ac046 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -19,7 +19,7 @@ bool FieldVisitorSum::operator() (UInt64 & x) const return x != 0; } -bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get(); return x != 0; } +bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.safeGet(); return x != 0; } bool FieldVisitorSum::operator() (Null &) const { diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index cbb4c4a1de3..d28676b5093 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -37,7 +37,7 @@ public: template bool operator() (DecimalField & x) const { - x += rhs.get>(); + x += rhs.safeGet>(); return x.getValue() != T(0); } diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index c4cb4266418..2148bac20d1 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -172,7 +172,7 @@ String FieldVisitorToString::operator() (const Object & x) const String convertFieldToString(const Field & field) { if (field.getType() == Field::Types::Which::String) - return field.get(); + return field.safeGet(); return applyVisitor(FieldVisitorToString(), field); } diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index a26797a687a..92621db5558 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -297,7 +297,7 @@ public: } /// Only inserts the value if key isn't already present - void ALWAYS_INLINE insertIfNotPresent(const Key & x, const Cell::Mapped & value) + void ALWAYS_INLINE insertIfNotPresent(const Key & x, const typename Cell::Mapped & value) { LookupResult it; bool inserted; diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 301a5c6cbbd..567fa35d977 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -55,6 +55,7 @@ public: virtual void set(const Key & key, const MappedPtr & mapped) = 0; virtual void remove(const Key & key) = 0; + virtual void remove(std::function predicate) = 0; virtual void clear() = 0; virtual std::vector dump() const = 0; diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h index f833e46a821..cb8fdbd2b9c 100644 --- a/src/Common/LRUCachePolicy.h +++ b/src/Common/LRUCachePolicy.h @@ -79,6 +79,22 @@ public: cells.erase(it); } + void remove(std::function predicate) override + { + for (auto it = cells.begin(); it != cells.end();) + { + if (predicate(it->first, it->second.value)) + { + Cell & cell = it->second; + current_size_in_bytes -= cell.size; + queue.erase(cell.queue_iterator); + it = cells.erase(it); + } + else + ++it; + } + } + MappedPtr get(const Key & key) override { auto it = cells.find(key); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 36191b89e86..e9f7816ce73 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -6,14 +6,18 @@ #include #include #include -#include +#include #include +#include +#include +#include #include #include #include #include #include #include +#include namespace fs = std::filesystem; @@ -26,6 +30,7 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int SUPPORT_IS_DISABLED; } static const std::string named_collections_storage_config_path = "named_collections_storage"; @@ -74,9 +79,9 @@ public: }; -class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext +class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, protected WithContext { -private: +protected: std::string root_path; public: @@ -126,6 +131,11 @@ public: ReadBufferFromFile in(getPath(file_name)); std::string data; readStringUntilEOF(data, in); + return readHook(data); + } + + virtual std::string readHook(const std::string & data) const + { return data; } @@ -142,8 +152,9 @@ public: fs::create_directories(root_path); auto tmp_path = getPath(file_name + ".tmp"); - WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); - writeString(data, out); + auto write_data = writeHook(data); + WriteBufferFromFile out(tmp_path, write_data.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(write_data, out); out.next(); if (getContext()->getSettingsRef().fsync_metadata) @@ -153,6 +164,11 @@ public: fs::rename(tmp_path, getPath(file_name)); } + virtual std::string writeHook(const std::string & data) const + { + return data; + } + void remove(const std::string & file_name) override { if (!removeIfExists(file_name)) @@ -168,7 +184,7 @@ public: return fs::remove(getPath(file_name)); } -private: +protected: std::string getPath(const std::string & file_name) const { const auto file_name_as_path = fs::path(file_name); @@ -178,6 +194,7 @@ private: return fs::path(root_path) / file_name_as_path; } +private: /// Delete .tmp files. They could be left undeleted in case of /// some exception or abrupt server restart. void cleanup() @@ -194,8 +211,7 @@ private: } }; - -class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext +class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, protected WithContext { private: std::string root_path; @@ -275,18 +291,25 @@ public: std::string read(const std::string & file_name) const override { - return getClient()->get(getPath(file_name)); + auto data = getClient()->get(getPath(file_name)); + return readHook(data); + } + + virtual std::string readHook(const std::string & data) const + { + return data; } void write(const std::string & file_name, const std::string & data, bool replace) override { + auto write_data = writeHook(data); if (replace) { - getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent); + getClient()->createOrUpdate(getPath(file_name), write_data, zkutil::CreateMode::Persistent); } else { - auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent); + auto code = getClient()->tryCreate(getPath(file_name), write_data, zkutil::CreateMode::Persistent); if (code == Coordination::Error::ZNODEEXISTS) { @@ -298,6 +321,11 @@ public: } } + virtual std::string writeHook(const std::string & data) const + { + return data; + } + void remove(const std::string & file_name) override { getClient()->remove(getPath(file_name)); @@ -334,6 +362,93 @@ private: } }; +#if USE_SSL + +template +class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage +{ +public: + NamedCollectionsMetadataStorageEncrypted(ContextPtr context_, const std::string & path_) + : BaseMetadataStorage(context_, path_) + { + const auto & config = BaseMetadataStorage::getContext()->getConfigRef(); + auto key_hex = config.getRawString("named_collections_storage.key_hex", ""); + try + { + key = boost::algorithm::unhex(key_hex); + key_fingerprint = FileEncryption::calculateKeyFingerprint(key); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length"); + } + + algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr")); + } + + std::string readHook(const std::string & data) const override + { + ReadBufferFromString in(data); + Memory<> encrypted_buffer(data.length()); + + FileEncryption::Header header; + try + { + header.read(in); + } + catch (Exception & e) + { + e.addMessage("While reading the header of encrypted data"); + throw; + } + + size_t bytes_read = 0; + while (bytes_read < encrypted_buffer.size() && !in.eof()) + { + bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read); + } + + std::string decrypted_buffer; + decrypted_buffer.resize(bytes_read); + FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); + encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data()); + + return decrypted_buffer; + } + + std::string writeHook(const std::string & data) const override + { + FileEncryption::Header header{ + .algorithm = algorithm, + .key_fingerprint = key_fingerprint, + .init_vector = FileEncryption::InitVector::random() + }; + + FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); + WriteBufferFromOwnString out; + header.write(out); + encryptor.encrypt(data.data(), data.size(), out); + return std::string(out.str()); + } + +private: + std::string key; + UInt128 key_fingerprint; + FileEncryption::Algorithm algorithm; +}; + +class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted +{ + using NamedCollectionsMetadataStorageEncrypted::NamedCollectionsMetadataStorageEncrypted; +}; + +class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted +{ + using NamedCollectionsMetadataStorageEncrypted::NamedCollectionsMetadataStorageEncrypted; +}; + +#endif + NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage( std::shared_ptr storage_, ContextPtr context_) @@ -495,7 +610,7 @@ std::unique_ptr NamedCollectionsMetadataStorage const auto & config = context_->getConfigRef(); const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local"); - if (storage_type == "local") + if (storage_type == "local" || storage_type == "local_encrypted") { const auto path = config.getString( named_collections_storage_config_path + ".path", @@ -504,14 +619,36 @@ std::unique_ptr NamedCollectionsMetadataStorage LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), "Using local storage for named collections at path: {}", path); - auto local_storage = std::make_unique(context_, path); + std::unique_ptr local_storage; + if (storage_type == "local") + local_storage = std::make_unique(context_, path); + else if (storage_type == "local_encrypted") + { +#if USE_SSL + local_storage = std::make_unique(context_, path); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support"); +#endif + } + return std::unique_ptr( new NamedCollectionsMetadataStorage(std::move(local_storage), context_)); } - if (storage_type == "zookeeper" || storage_type == "keeper") + if (storage_type == "zookeeper" || storage_type == "keeper" || storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted") { const auto path = config.getString(named_collections_storage_config_path + ".path"); - auto zk_storage = std::make_unique(context_, path); + + std::unique_ptr zk_storage; + if (!storage_type.ends_with("_encrypted")) + zk_storage = std::make_unique(context_, path); + else + { +#if USE_SSL + zk_storage = std::make_unique(context_, path); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support"); +#endif + } LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), "Using zookeeper storage for named collections at path: {}", path); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index c3468fbc468..52805e8359d 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -35,7 +35,9 @@ public: private: class INamedCollectionsStorage; class LocalStorage; + class LocalStorageEncrypted; class ZooKeeperStorage; + class ZooKeeperStorageEncrypted; std::shared_ptr storage; diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 712cab80aff..2cdb3409487 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -244,33 +244,43 @@ const char * analyzeImpl( is_trivial = false; if (!in_square_braces) { - /// Check for case-insensitive flag. - if (pos + 1 < end && pos[1] == '?') + /// it means flag negation + /// there are various possible flags + /// actually only imsU are supported by re2 + auto is_flag_char = [](char x) { - for (size_t offset = 2; pos + offset < end; ++offset) + return x == '-' || x == 'i' || x == 'm' || x == 's' || x == 'U' || x == 'u'; + }; + /// Check for case-insensitive flag. + if (pos + 2 < end && pos[1] == '?' && is_flag_char(pos[2])) + { + size_t offset = 2; + for (; pos + offset < end; ++offset) { - if (pos[offset] == '-' /// it means flag negation - /// various possible flags, actually only imsU are supported by re2 - || (pos[offset] >= 'a' && pos[offset] <= 'z') - || (pos[offset] >= 'A' && pos[offset] <= 'Z')) + if (pos[offset] == 'i') { - if (pos[offset] == 'i') - { - /// Actually it can be negated case-insensitive flag. But we don't care. - has_case_insensitive_flag = true; - break; - } + /// Actually it can be negated case-insensitive flag. But we don't care. + has_case_insensitive_flag = true; } - else + else if (!is_flag_char(pos[offset])) break; } + pos += offset; + if (pos == end) + return pos; + /// if this group only contains flags, we have nothing to do. + if (*pos == ')') + { + ++pos; + break; + } } /// (?:regex) means non-capturing parentheses group - if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') + else if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') { pos += 2; } - if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) + else if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) { pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end); } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ccdce7ff584..d43d9fdcea8 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -209,8 +209,35 @@ \ M(Merge, "Number of launched background merges.") \ M(MergedRows, "Rows read for background merges. This is the number of rows before merge.") \ + M(MergedColumns, "Number of columns merged during the horizontal stage of merges.") \ + M(GatheredColumns, "Number of columns gathered during the vertical stage of merges.") \ M(MergedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for background merges. This is the number before merge.") \ - M(MergesTimeMilliseconds, "Total time spent for background merges.")\ + M(MergeTotalMilliseconds, "Total time spent for background merges") \ + M(MergeExecuteMilliseconds, "Total busy time spent for execution of background merges") \ + M(MergeHorizontalStageTotalMilliseconds, "Total time spent for horizontal stage of background merges") \ + M(MergeHorizontalStageExecuteMilliseconds, "Total busy time spent for execution of horizontal stage of background merges") \ + M(MergeVerticalStageTotalMilliseconds, "Total time spent for vertical stage of background merges") \ + M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges") \ + M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges") \ + M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges") \ + \ + M(MergingSortedMilliseconds, "Total time spent while merging sorted columns") \ + M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns") \ + M(CollapsingSortedMilliseconds, "Total time spent while collapsing sorted columns") \ + M(ReplacingSortedMilliseconds, "Total time spent while replacing sorted columns") \ + M(SummingSortedMilliseconds, "Total time spent while summing sorted columns") \ + M(VersionedCollapsingSortedMilliseconds, "Total time spent while version collapsing sorted columns") \ + M(GatheringColumnMilliseconds, "Total time spent while gathering columns for vertical merge") \ + \ + M(MutationTotalParts, "Number of total parts for which mutations tried to be applied") \ + M(MutationUntouchedParts, "Number of total parts for which mutations tried to be applied but which was completely skipped according to predicate") \ + M(MutatedRows, "Rows read for mutations. This is the number of rows before mutation") \ + M(MutatedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for mutations. This is the number before mutation.") \ + M(MutationTotalMilliseconds, "Total time spent for mutations.") \ + M(MutationExecuteMilliseconds, "Total busy time spent for execution of mutations.") \ + M(MutationAllPartColumns, "Number of times when task to mutate all columns in part was created") \ + M(MutationSomePartColumns, "Number of times when task to mutate some columns in part was created") \ + M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections in mutations.") \ \ M(MergeTreeDataWriterRows, "Number of rows INSERTed to MergeTree tables.") \ M(MergeTreeDataWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables.") \ @@ -225,7 +252,6 @@ M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \ M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \ M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \ - M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \ \ M(InsertedWideParts, "Number of parts inserted in Wide format.") \ M(InsertedCompactParts, "Number of parts inserted in Compact format.") \ diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index b06073121e7..a46837bfdb9 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -112,9 +112,8 @@ namespace return configuration.has(config_prefix + ".uri"); } - /* - * New syntax requires protocol prefix " or " - * */ + /* New syntax requires protocol prefix " or " + */ std::optional getProtocolPrefix( ProxyConfiguration::Protocol request_protocol, const String & config_prefix, @@ -130,22 +129,18 @@ namespace return protocol_prefix; } - template std::optional calculatePrefixBasedOnSettingsSyntax( + bool new_syntax, ProxyConfiguration::Protocol request_protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration ) { if (!configuration.has(config_prefix)) - { return std::nullopt; - } - if constexpr (new_syntax) - { + if (new_syntax) return getProtocolPrefix(request_protocol, config_prefix, configuration); - } return config_prefix; } @@ -155,24 +150,21 @@ std::shared_ptr ProxyConfigurationResolverProvider:: Protocol request_protocol, const Poco::Util::AbstractConfiguration & configuration) { - if (auto resolver = getFromSettings(request_protocol, "proxy", configuration)) - { + if (auto resolver = getFromSettings(true, request_protocol, "proxy", configuration)) return resolver; - } return std::make_shared( request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } -template std::shared_ptr ProxyConfigurationResolverProvider::getFromSettings( + bool new_syntax, Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(request_protocol, config_prefix, configuration); + auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(new_syntax, request_protocol, config_prefix, configuration); if (!prefix_opt) { @@ -195,20 +187,17 @@ std::shared_ptr ProxyConfigurationResolverProvider:: std::shared_ptr ProxyConfigurationResolverProvider::getFromOldSettingsFormat( Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - /* - * First try to get it from settings only using the combination of config_prefix and configuration. + /* First try to get it from settings only using the combination of config_prefix and configuration. * This logic exists for backward compatibility with old S3 storage specific proxy configuration. * */ - if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(request_protocol, config_prefix + ".proxy", configuration)) + if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(false, request_protocol, config_prefix + ".proxy", configuration)) { return resolver; } - /* - * In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. + /* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. * Falls back to Environment resolver if no configuration is found. * */ return ProxyConfigurationResolverProvider::get(request_protocol, configuration); diff --git a/src/Common/ProxyConfigurationResolverProvider.h b/src/Common/ProxyConfigurationResolverProvider.h index ebf22f7e92a..357b218e499 100644 --- a/src/Common/ProxyConfigurationResolverProvider.h +++ b/src/Common/ProxyConfigurationResolverProvider.h @@ -33,12 +33,11 @@ public: ); private: - template static std::shared_ptr getFromSettings( + bool is_new_syntax, Protocol protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration - ); + const Poco::Util::AbstractConfiguration & configuration); }; } diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 95% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index f5b700ea529..0b2f6c09b45 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -133,7 +132,7 @@ Field QueryFuzzer::fuzzField(Field field) if (type == Field::Types::String) { - auto & str = field.get(); + auto & str = field.safeGet(); UInt64 action = fuzz_rand() % 10; switch (action) { @@ -159,13 +158,14 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Array) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -191,13 +193,15 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Tuple) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -912,17 +922,17 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) auto type = l->value.getType(); if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0) { - String value = l->value.get(); + String value = l->value.safeGet(); child = makeASTFunction( "toFixedString", std::make_shared(value), std::make_shared(static_cast(value.size()))); } else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0) { @@ -930,22 +940,22 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) if (decimal == 0) child = makeASTFunction( "toDecimal32", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 9))); else if (decimal == 1) child = makeASTFunction( "toDecimal64", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 18))); else if (decimal == 2) child = makeASTFunction( "toDecimal128", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 38))); else child = makeASTFunction( "toDecimal256", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 76))); } @@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (out_stream) + { + *out_stream << std::endl; + + WriteBufferFromOStream ast_buf(*out_stream, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + *out_stream << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 91% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..35d088809f2 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,9 +35,31 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; +public: + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -66,10 +88,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -77,9 +95,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -102,8 +117,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 746010b5462..85c92ec292d 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -110,7 +110,7 @@ namespace errno = saved_errno; } - [[maybe_unused]] constexpr UInt32 TIMER_PRECISION = 1e9; + [[maybe_unused]] constexpr UInt64 TIMER_PRECISION = 1e9; } namespace ErrorCodes @@ -167,18 +167,18 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal } } -void Timer::set(UInt32 period) +void Timer::set(UInt64 period) { /// Too high frequency can introduce infinite busy loop of signal handlers. We will limit maximum frequency (with 1000 signals per second). - period = std::max(period, 1000000); + period = std::max(period, 1000000); /// Randomize offset as uniform random value from 0 to period - 1. /// It will allow to sample short queries even if timer period is large. /// (For example, with period of 1 second, query with 50 ms duration will be sampled with 1 / 20 probability). /// It also helps to avoid interference (moire). - UInt32 period_rand = std::uniform_int_distribution(0, period)(thread_local_rng); + UInt64 period_rand = std::uniform_int_distribution(0, period)(thread_local_rng); - struct timespec interval{.tv_sec = period / TIMER_PRECISION, .tv_nsec = period % TIMER_PRECISION}; - struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION}; + struct timespec interval{.tv_sec = time_t(period / TIMER_PRECISION), .tv_nsec = int64_t(period % TIMER_PRECISION)}; + struct timespec offset{.tv_sec = time_t(period_rand / TIMER_PRECISION), .tv_nsec = int64_t(period_rand % TIMER_PRECISION)}; struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset}; if (timer_settime(*timer_id, 0, &timer_spec, nullptr)) @@ -229,7 +229,7 @@ void Timer::cleanup() template QueryProfilerBase::QueryProfilerBase( - [[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_) + [[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt64 period, [[maybe_unused]] int pause_signal_) : log(getLogger("QueryProfiler")), pause_signal(pause_signal_) { #if defined(SANITIZER) @@ -270,7 +270,7 @@ QueryProfilerBase::QueryProfilerBase( template -void QueryProfilerBase::setPeriod([[maybe_unused]] UInt32 period_) +void QueryProfilerBase::setPeriod([[maybe_unused]] UInt64 period_) { #if defined(SANITIZER) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); @@ -307,7 +307,7 @@ void QueryProfilerBase::cleanup() template class QueryProfilerBase; template class QueryProfilerBase; -QueryProfilerReal::QueryProfilerReal(UInt64 thread_id, UInt32 period) +QueryProfilerReal::QueryProfilerReal(UInt64 thread_id, UInt64 period) : QueryProfilerBase(thread_id, CLOCK_MONOTONIC, period, SIGUSR1) {} @@ -320,7 +320,7 @@ void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context) writeTraceInfo(TraceType::Real, sig, info, context); } -QueryProfilerCPU::QueryProfilerCPU(UInt64 thread_id, UInt32 period) +QueryProfilerCPU::QueryProfilerCPU(UInt64 thread_id, UInt64 period) : QueryProfilerBase(thread_id, CLOCK_THREAD_CPUTIME_ID, period, SIGUSR2) {} diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index ea4cc73bca6..e3ab0b2e094 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -40,7 +40,7 @@ public: ~Timer(); void createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal); - void set(UInt32 period); + void set(UInt64 period); void stop(); void cleanup(); @@ -54,10 +54,10 @@ template class QueryProfilerBase { public: - QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_); + QueryProfilerBase(UInt64 thread_id, int clock_type, UInt64 period, int pause_signal_); ~QueryProfilerBase(); - void setPeriod(UInt32 period_); + void setPeriod(UInt64 period_); private: void cleanup(); @@ -76,7 +76,7 @@ private: class QueryProfilerReal : public QueryProfilerBase { public: - QueryProfilerReal(UInt64 thread_id, UInt32 period); /// NOLINT + QueryProfilerReal(UInt64 thread_id, UInt64 period); /// NOLINT static void signalHandler(int sig, siginfo_t * info, void * context); }; @@ -85,7 +85,7 @@ public: class QueryProfilerCPU : public QueryProfilerBase { public: - QueryProfilerCPU(UInt64 thread_id, UInt32 period); /// NOLINT + QueryProfilerCPU(UInt64 thread_id, UInt64 period); /// NOLINT static void signalHandler(int sig, siginfo_t * info, void * context); }; diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h index 354ec1d36d6..5321110f3e5 100644 --- a/src/Common/SLRUCachePolicy.h +++ b/src/Common/SLRUCachePolicy.h @@ -95,6 +95,27 @@ public: cells.erase(it); } + void remove(std::function predicate) override + { + for (auto it = cells.begin(); it != cells.end();) + { + if (predicate(it->first, it->second.value)) + { + auto & cell = it->second; + + current_size_in_bytes -= cell.size; + if (cell.is_protected) + current_protected_size -= cell.size; + + auto & queue = cell.is_protected ? protected_queue : probationary_queue; + queue.erase(cell.queue_iterator); + it = cells.erase(it); + } + else + ++it; + } + } + MappedPtr get(const Key & key) override { auto it = cells.find(key); diff --git a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp index 01aa7df48d3..6b9f6318903 100644 --- a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp +++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp @@ -184,14 +184,20 @@ void DynamicResourceManager::updateConfiguration(const Poco::Util::AbstractConfi // Resource update leads to loss of runtime data of nodes and may lead to temporary violation of constraints (e.g. limits) // Try to minimise this by reusing "equal" resources (initialized with the same configuration). + std::vector resources_to_attach; for (auto & [name, new_resource] : new_state->resources) { if (auto iter = state->resources.find(name); iter != state->resources.end()) // Resource update { State::ResourcePtr old_resource = iter->second; if (old_resource->equals(*new_resource)) + { new_resource = old_resource; // Rewrite with older version to avoid loss of runtime data + continue; + } } + // It is new or updated resource + resources_to_attach.emplace_back(new_resource); } // Commit new state @@ -199,17 +205,14 @@ void DynamicResourceManager::updateConfiguration(const Poco::Util::AbstractConfi state = new_state; // Attach new and updated resources to the scheduler - for (auto & [name, resource] : new_state->resources) + for (auto & resource : resources_to_attach) { const SchedulerNodePtr & root = resource->nodes.find("/")->second.ptr; - if (root->parent == nullptr) + resource->attached_to = &scheduler; + scheduler.event_queue->enqueue([this, root] { - resource->attached_to = &scheduler; - scheduler.event_queue->enqueue([this, root] - { - scheduler.attachChild(root); - }); - } + scheduler.attachChild(root); + }); } // NOTE: after mutex unlock `state` became available for Classifier(s) and must be immutable diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 98a21b43d76..0d41669816c 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -237,7 +237,14 @@ std::unique_ptr ShellCommand::executeImpl( res->write_fds.emplace(fd, fds.fds_rw[1]); } - LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid); + LOG_TRACE( + getLogger(), + "Started shell command '{}' with pid {} and file descriptors: out {}, err {}", + filename, + pid, + res->out.getFD(), + res->err.getFD()); + return res; } diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp index e025e49e0a3..6ac6cbcae29 100644 --- a/src/Common/SignalHandlers.cpp +++ b/src/Common/SignalHandlers.cpp @@ -18,13 +18,17 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int CANNOT_SEND_SIGNAL; } + } +extern const char * GIT_HASH; + using namespace DB; @@ -334,7 +338,7 @@ void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) co size_t pos = message.find('\n'); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos)); + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -368,7 +372,7 @@ try LOG_FATAL(log, "########## Short fault info ############"); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, sig); std::string signal_description = "Unknown signal"; @@ -434,13 +438,13 @@ try if (query_id.empty()) { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, signal_description, sig); } else { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, query_id, query, signal_description, sig); } @@ -629,6 +633,7 @@ void HandledSignals::setupTerminateHandler() void HandledSignals::setupCommonDeadlySignalHandlers() { /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. + /// NOTE: that it is also used by clickhouse-test wrapper addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true); #if defined(SANITIZER) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 76277cbc993..bd01b639913 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -248,8 +248,31 @@ void StackTrace::forEachFrame( auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first; DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress( - uintptr_t(current_frame.physical_addr), location, mode, inline_frames)) + uintptr_t adjusted_addr = uintptr_t(current_frame.physical_addr); + if (i > 0) + { + /// For non-innermost stack frames, the address points to the *next* instruction + /// after the `call` instruction. But we want the line number and inline function + /// information for the `call` instruction. So subtract 1 from the address. + /// Caveats: + /// * The `call` instruction can be longer than 1 byte, so addr-1 is in the middle + /// of the instruction. That's ok for debug info lookup: address ranges in debug + /// info cover the whole instruction. + /// * If the stack trace unwound out of a signal handler, the stack frame just + /// outside the signal didn't do a function call. It was interrupted by signal. + /// There's no `call` instruction, and decrementing the address is incorrect. + /// We may get incorrect line number and inlined functions in this case. + /// Unfortunate. + /// Note that libunwind, when producing this stack trace, knows whether this + /// frame is interrupted by signal or not. We could propagate this information + /// from libunwind to here and avoid subtracting 1 in this case, but currently + /// we don't do this. + /// But we don't do the decrement for findSymbol below (because `call` is + /// ~never the last instruction of a function), so the function name should be + /// correct for both pre-signal frames and regular frames. + adjusted_addr -= 1; + } + if (dwarf_it->second.findAddress(adjusted_addr, location, mode, inline_frames)) { current_frame.file = location.file.toString(); current_frame.line = location.line; diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index 80464f38082..0bbb7ff411d 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_) std::string contents; { ReadBufferFromFile in(path, 1024); - LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ {}); + LimitReadBuffer limit_in(in, 1024, /* throw_exception */ false, /* exact_limit */ {}); readStringUntilEOF(contents, limit_in); } diff --git a/src/Common/StringHashForHeterogeneousLookup.h b/src/Common/StringHashForHeterogeneousLookup.h new file mode 100644 index 00000000000..56d8ccf0009 --- /dev/null +++ b/src/Common/StringHashForHeterogeneousLookup.h @@ -0,0 +1,30 @@ +#pragma once +#include + +namespace DB +{ + +/// See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0919r3.html +struct StringHashForHeterogeneousLookup +{ + using hash_type = std::hash; + using transparent_key_equal = std::equal_to<>; + using is_transparent = void; // required to make find() work with different type than key_type + + auto operator()(const std::string_view view) const + { + return hash_type()(view); + } + + auto operator()(const std::string & str) const + { + return hash_type()(str); + } + + auto operator()(const char * data) const + { + return hash_type()(data); + } +}; + +} diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 7d2c15714e2..45f4eb1c5a6 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -27,12 +27,14 @@ #include #include + namespace DB { namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; + extern const int ABORTED; } ISystemLog::~ISystemLog() = default; @@ -86,32 +88,18 @@ void SystemLogQueue::push(LogElement&& element) // by one, under exclusive lock, so we will see each message count. // It is enough to only wake the flushing thread once, after the message // count increases past half available size. - const uint64_t queue_end = queue_front_index + queue.size(); - requested_flush_up_to = std::max(requested_flush_up_to, queue_end); - flush_event.notify_all(); + const auto last_log_index = queue_front_index + queue.size(); + notifyFlushUnlocked(last_log_index, /* should_prepare_tables_anyway */ false); } if (queue.size() >= settings.max_size_rows) { + chassert(queue.size() == settings.max_size_rows); + // Ignore all further entries until the queue is flushed. - // Log a message about that. Don't spam it -- this might be especially - // problematic in case of trace log. Remember what the front index of the - // queue was when we last logged the message. If it changed, it means the - // queue was flushed, and we can log again. - if (queue_front_index != logged_queue_full_at_index) - { - logged_queue_full_at_index = queue_front_index; - - // TextLog sets its logger level to 0, so this log is a noop and - // there is no recursive logging. - lock.unlock(); - LOG_ERROR(log, "Queue is full for system log '{}' at {}. max_size_rows {}", - demangle(typeid(*this).name()), - queue_front_index, - settings.max_size_rows); - } - + // To the next batch we add a log message about how much we have lost + ++ignored_logs; return; } @@ -127,20 +115,50 @@ template void SystemLogQueue::handleCrash() { if (settings.notify_flush_on_crash) - notifyFlush(/* force */ true); + { + notifyFlush(getLastLogIndex(), /* should_prepare_tables_anyway */ true); + } } template -void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) +void SystemLogQueue::notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway) { + if (should_prepare_tables_anyway) + requested_prepare_tables = std::max(requested_prepare_tables, expected_flushed_index); + + requested_flush_index = std::max(requested_flush_index, expected_flushed_index); + + flush_event.notify_all(); +} + +template +void SystemLogQueue::notifyFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + std::lock_guard lock(mutex); + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); +} + +template +void SystemLogQueue::waitFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + LOG_DEBUG(log, "Requested flush up to offset {}", expected_flushed_index); + // Use an arbitrary timeout to avoid endless waiting. 60s proved to be // too fast for our parallel functional tests, probably because they // heavily load the disk. const int timeout_seconds = 180; + std::unique_lock lock(mutex); - bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + + // there is no obligation to call notifyFlush before waitFlush, than we have to be sure that flush_event has been triggered before we wait the result + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); + + auto result = confirm_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] { - return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables; + if (should_prepare_tables_anyway) + return (flushed_index >= expected_flushed_index && prepared_tables >= requested_prepare_tables) || is_shutdown; + else + return (flushed_index >= expected_flushed_index) || is_shutdown; }); if (!result) @@ -148,67 +166,63 @@ void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.", toString(timeout_seconds), demangle(typeid(*this).name())); } -} - -template -uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) -{ - uint64_t this_thread_requested_offset; + if (is_shutdown) { - std::lock_guard lock(mutex); - if (is_shutdown) - return uint64_t(-1); - - this_thread_requested_offset = queue_front_index + queue.size(); - - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - is_force_prepare_tables |= should_prepare_tables_anyway; - requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); - - flush_event.notify_all(); + throw Exception(ErrorCodes::ABORTED, "Shutdown has been called while flushing system log '{}'. Aborting.", + demangle(typeid(*this).name())); } - - LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); - return this_thread_requested_offset; } template -void SystemLogQueue::confirm(uint64_t to_flush_end) +SystemLogQueue::Index SystemLogQueue::getLastLogIndex() { std::lock_guard lock(mutex); - flushed_up_to = to_flush_end; - is_force_prepare_tables = false; - flush_event.notify_all(); + return queue_front_index + queue.size(); } template -typename SystemLogQueue::Index SystemLogQueue::pop(std::vector & output, - bool & should_prepare_tables_anyway, - bool & exit_this_thread) +void SystemLogQueue::confirm(SystemLogQueue::Index last_flashed_index) { - /// Call dtors and deallocate strings without holding the global lock - output.resize(0); + std::lock_guard lock(mutex); + prepared_tables = std::max(prepared_tables, last_flashed_index); + flushed_index = std::max(flushed_index, last_flashed_index); + confirm_event.notify_all(); +} - std::unique_lock lock(mutex); - flush_event.wait_for(lock, - std::chrono::milliseconds(settings.flush_interval_milliseconds), - [&] () +template +typename SystemLogQueue::PopResult SystemLogQueue::pop() +{ + PopResult result; + size_t prev_ignored_logs = 0; + + { + std::unique_lock lock(mutex); + + flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), [&] () { - return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables; - } - ); + return requested_flush_index > flushed_index || requested_prepare_tables > prepared_tables || is_shutdown; + }); - queue_front_index += queue.size(); - // Swap with existing array from previous flush, to save memory - // allocations. - queue.swap(output); + if (is_shutdown) + return PopResult{.is_shutdown = true}; - should_prepare_tables_anyway = is_force_prepare_tables; + queue_front_index += queue.size(); + prev_ignored_logs = ignored_logs; + ignored_logs = 0; - exit_this_thread = is_shutdown; - return queue_front_index; + result.last_log_index = queue_front_index; + result.logs.swap(queue); + result.create_table_force = requested_prepare_tables > prepared_tables; + } + + if (prev_ignored_logs) + LOG_ERROR(log, "Queue had been full at {}, accepted {} logs, ignored {} logs.", + result.last_log_index - result.logs.size(), + result.logs.size(), + prev_ignored_logs); + + return result; } template @@ -229,13 +243,21 @@ SystemLogBase::SystemLogBase( } template -void SystemLogBase::flush(bool force) +SystemLogBase::Index SystemLogBase::getLastLogIndex() { - uint64_t this_thread_requested_offset = queue->notifyFlush(force); - if (this_thread_requested_offset == uint64_t(-1)) - return; + return queue->getLastLogIndex(); +} - queue->waitFlush(this_thread_requested_offset); +template +void SystemLogBase::notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + queue->notifyFlush(expected_flushed_index, should_prepare_tables_anyway); +} + +template +void SystemLogBase::flush(Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + queue->waitFlush(expected_flushed_index, should_prepare_tables_anyway); } template @@ -251,15 +273,31 @@ void SystemLogBase::startup() saving_thread = std::make_unique([this] { savingThreadFunction(); }); } +template +void SystemLogBase::stopFlushThread() +{ + { + std::lock_guard lock(thread_mutex); + + if (!saving_thread || !saving_thread->joinable()) + return; + + if (is_shutdown) + return; + + is_shutdown = true; + queue->shutdown(); + } + + saving_thread->join(); +} + template void SystemLogBase::add(LogElement element) { queue->push(std::move(element)); } -template -void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } - #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index b87fcf419d3..0942e920a42 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -54,10 +55,19 @@ struct StorageID; class ISystemLog { public: + using Index = int64_t; + virtual String getName() const = 0; - //// force -- force table creation (used for SYSTEM FLUSH LOGS) - virtual void flush(bool force = false) = 0; /// NOLINT + /// Return the index of the latest added log element. That index no less than the flashed index. + /// The flashed index is the index of the last log element which has been flushed successfully. + /// Thereby all the records whose index is less than the flashed index are flushed already. + virtual Index getLastLogIndex() = 0; + /// Call this method to wake up the flush thread and flush the data in the background. It is non blocking call + virtual void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; + /// Call this method to wait intill the logs are flushed up to expected_flushed_index. It is blocking call. + virtual void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; + virtual void prepareTable() = 0; /// Start the background thread. @@ -97,26 +107,38 @@ struct SystemLogQueueSettings template class SystemLogQueue { - using Index = uint64_t; - public: + using Index = ISystemLog::Index; + explicit SystemLogQueue(const SystemLogQueueSettings & settings_); void shutdown(); // producer methods void push(LogElement && element); - Index notifyFlush(bool should_prepare_tables_anyway); - void waitFlush(Index expected_flushed_up_to); + + Index getLastLogIndex(); + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); + void waitFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash(); + struct PopResult + { + Index last_log_index = 0; + std::vector logs = {}; + bool create_table_force = false; + bool is_shutdown = false; + }; + // consumer methods - Index pop(std::vector& output, bool & should_prepare_tables_anyway, bool & exit_this_thread); - void confirm(Index to_flush_end); + PopResult pop(); + void confirm(Index last_flashed_index); private: + void notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway); + /// Data shared between callers of add()/flush()/shutdown(), and the saving thread std::mutex mutex; @@ -124,22 +146,32 @@ private: // Queue is bounded. But its size is quite large to not block in all normal cases. std::vector queue; + // An always-incrementing index of the first message currently in the queue. // We use it to give a global sequential index to every message, so that we // can wait until a particular message is flushed. This is used to implement // synchronous log flushing for SYSTEM FLUSH LOGS. Index queue_front_index = 0; - // A flag that says we must create the tables even if the queue is empty. - bool is_force_prepare_tables = false; + // Requested to flush logs up to this index, exclusive - Index requested_flush_up_to = 0; + Index requested_flush_index = std::numeric_limits::min(); // Flushed log up to this index, exclusive - Index flushed_up_to = 0; - // Logged overflow message at this queue front index - Index logged_queue_full_at_index = -1; + Index flushed_index = 0; + + // The same logic for the prepare tables: if requested_prepar_tables > prepared_tables we need to do prepare + // except that initial prepared_tables is -1 + // it is due to the difference: when no logs have been written and we call flush logs + // it becomes in the state: requested_flush_index = 0 and flushed_index = 0 -- we do not want to do anything + // but if we need to prepare tables it becomes requested_prepare_tables = 0 and prepared_tables = -1 + // we trigger background thread and do prepare + Index requested_prepare_tables = std::numeric_limits::min(); + Index prepared_tables = -1; + + size_t ignored_logs = 0; bool is_shutdown = false; + std::condition_variable confirm_event; std::condition_variable flush_event; const SystemLogQueueSettings settings; @@ -150,6 +182,7 @@ template class SystemLogBase : public ISystemLog { public: + using Index = ISystemLog::Index; using Self = SystemLogBase; explicit SystemLogBase( @@ -163,15 +196,16 @@ public: */ void add(LogElement element); + Index getLastLogIndex() override; + + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. - void flush(bool force) override; + void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash() override; - /// Non-blocking flush data in the buffer to disk. - void notifyFlush(bool force); - String getName() const override { return LogElement::name(); } static const char * getDefaultOrderBy() { return "event_date, event_time"; } @@ -182,6 +216,8 @@ public: static consteval bool shouldTurnOffLogger() { return false; } protected: + void stopFlushThread() final; + std::shared_ptr> queue; }; } diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index 6401835b0d7..6c548e5042b 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -145,6 +145,23 @@ public: size_in_bytes -= sz; } + void remove(std::function predicate) override + { + for (auto it = cache.begin(); it != cache.end();) + { + if (predicate(it->first, it->second)) + { + size_t sz = weight_function(*it->second); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); + it = cache.erase(it); + size_in_bytes -= sz; + } + else + ++it; + } + } + MappedPtr get(const Key & key) override { auto it = cache.find(key); diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 064ac2261ec..1a9ed4f1ee7 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1570,7 +1570,7 @@ size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination:: KeeperMultiException::KeeperMultiException(Coordination::Error exception_code, size_t failed_op_index_, const Coordination::Requests & requests_, const Coordination::Responses & responses_) - : KeeperException(exception_code, "Transaction failed: Op #{}, path", failed_op_index_), + : KeeperException(exception_code, "Transaction failed ({}): Op #{}, path", exception_code, failed_op_index_), requests(requests_), responses(responses_), failed_op_index(failed_op_index_) { addMessage(getPathForFirstFailedOp()); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 657c9cb2c03..7ccdc9d1b7f 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -44,7 +44,7 @@ namespace ErrorCodes namespace zkutil { -/// Preferred size of multi() command (in number of ops) +/// Preferred size of multi command (in the number of operations) constexpr size_t MULTI_BATCH_SIZE = 100; struct ShuffleHost diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 6a0090130a3..2e3b8d84366 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -32,6 +32,8 @@ #cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN +#cmakedefine01 USE_QPL +#cmakedefine01 USE_QATLIB #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP @@ -56,10 +58,11 @@ #cmakedefine01 USE_FILELOG #cmakedefine01 USE_ODBC #cmakedefine01 USE_BLAKE3 +#cmakedefine01 USE_USEARCH #cmakedefine01 USE_SKIM #cmakedefine01 USE_PRQL #cmakedefine01 USE_ULID -#cmakedefine01 FIU_ENABLE +#cmakedefine01 USE_LIBFIU #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 69580d4ad0e..8383e80d09d 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -92,3 +92,8 @@ endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) + +if (TARGET ch_contrib::icu) + clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp) + target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu) +endif () diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp index 6793d567aca..3a1304e2d94 100644 --- a/src/Common/examples/arena_with_free_lists.cpp +++ b/src/Common/examples/arena_with_free_lists.cpp @@ -174,19 +174,19 @@ struct Dictionary { switch (attribute.type) { - case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; case AttributeUnderlyingTypeTest::String: { - const auto & string = value.get(); + const auto & string = value.safeGet(); auto & string_ref = std::get>(attribute.arrays)[idx]; const auto & null_value_ref = std::get(attribute.null_values); diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp new file mode 100644 index 00000000000..826e1763105 --- /dev/null +++ b/src/Common/examples/utf8_upper_lower.cpp @@ -0,0 +1,27 @@ +#include +#include + +std::string utf8_to_lower(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toLower(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +std::string utf8_to_upper(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toUpper(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +int main() +{ + std::string input = "ır"; + std::cout << "upper:" << utf8_to_upper(input) << std::endl; + return 0; +} diff --git a/src/Common/getNumberOfPhysicalCPUCores.h b/src/Common/getNumberOfPhysicalCPUCores.h index 827e95e1bea..9e3412fdcba 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.h +++ b/src/Common/getNumberOfPhysicalCPUCores.h @@ -1,4 +1,5 @@ #pragma once /// Get number of CPU cores without hyper-threading. +/// The calculation respects possible cgroups limits. unsigned getNumberOfPhysicalCPUCores(); diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index df3820b11f9..6a53098362d 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -79,11 +79,16 @@ std::vector parseRemoteDescription( /// Look for the corresponding closing bracket for (m = i + 1; m < r; ++m) { - if (description[m] == '{') ++cnt; - if (description[m] == '}') --cnt; - if (description[m] == '.' && description[m-1] == '.') last_dot = m; - if (description[m] == separator) have_splitter = true; - if (cnt == 0) break; + if (description[m] == '{') + ++cnt; + if (description[m] == '}') + --cnt; + if (description[m] == '.' && description[m-1] == '.') + last_dot = m; + if (description[m] == separator) + have_splitter = true; + if (cnt == 0) + break; } if (cnt != 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}': incorrect brace sequence in first argument", func_name); diff --git a/src/Common/tests/gtest_optimize_re.cpp b/src/Common/tests/gtest_optimize_re.cpp index a9fcb918b24..d6735c3ccfe 100644 --- a/src/Common/tests/gtest_optimize_re.cpp +++ b/src/Common/tests/gtest_optimize_re.cpp @@ -19,6 +19,9 @@ TEST(OptimizeRE, analyze) }; test_f("abc", "abc", {}, true, true); test_f("c([^k]*)de", ""); + test_f("(?-s)bob", "bob", {}, false, true); + test_f("(?s)bob", "bob", {}, false, true); + test_f("(?ssss", ""); test_f("abc(de)fg", "abcdefg", {}, false, true); test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true); test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true); diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index f1b5b24e866..c82ee861a6f 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -1,7 +1,3 @@ -#ifdef ENABLE_QPL_COMPRESSION - -#include -#include #include #include #include @@ -11,6 +7,10 @@ #include #include #include +#include +#include + +#if USE_QPL #include "libaccel_config.h" diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index 86fd9051bd8..d9abc0fb7e0 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -4,6 +4,11 @@ #include #include #include + +#include "config.h" + +#if USE_QPL + #include namespace Poco @@ -117,3 +122,4 @@ private: }; } +#endif diff --git a/src/Compression/CompressionCodecZSTDQAT.cpp b/src/Compression/CompressionCodecZSTDQAT.cpp index 5a4ef70a30a..e19b7e4a001 100644 --- a/src/Compression/CompressionCodecZSTDQAT.cpp +++ b/src/Compression/CompressionCodecZSTDQAT.cpp @@ -1,4 +1,6 @@ -#ifdef ENABLE_ZSTD_QAT_CODEC +#include "config.h" + +#if USE_QATLIB #include #include @@ -6,6 +8,7 @@ #include #include #include + #include #include diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 2e7aa0d086f..ac00f571568 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -1,20 +1,20 @@ -#include "config.h" - #include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include +#include #include +#include "config.h" + namespace DB { @@ -175,11 +175,11 @@ void registerCodecNone(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL void registerCodecDeflateQpl(CompressionCodecFactory & factory); #endif @@ -198,7 +198,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecNone(*this); registerCodecLZ4(*this); registerCodecZSTD(*this); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB registerCodecZSTDQAT(*this); #endif registerCodecLZ4HC(*this); @@ -209,7 +209,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 3f5ac055470..2ed89c414ff 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -54,7 +54,7 @@ namespace std::filesystem::path path(snapshot_path); std::string filename = path.stem(); Strings name_parts; - splitInto<'_'>(name_parts, filename); + splitInto<'_', '.'>(name_parts, filename); return parse(name_parts[1]); } diff --git a/src/Coordination/RaftServerConfig.cpp b/src/Coordination/RaftServerConfig.cpp index 929eeeb640e..bafc177b736 100644 --- a/src/Coordination/RaftServerConfig.cpp +++ b/src/Coordination/RaftServerConfig.cpp @@ -26,12 +26,16 @@ std::optional RaftServerConfig::parse(std::string_view server) if (!with_id_endpoint && !with_server_type && !with_priority) return std::nullopt; - const std::string_view id_str = parts[0]; + std::string_view id_str = parts[0]; if (!id_str.starts_with("server.")) return std::nullopt; + id_str = id_str.substr(7); + if (auto eq_pos = id_str.find('='); std::string_view::npos != eq_pos) + id_str = id_str.substr(0, eq_pos); + Int32 id; - if (!tryParse(id, std::next(id_str.begin(), 7))) + if (!tryParse(id, id_str)) return std::nullopt; if (id <= 0) return std::nullopt; diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index af681cd5639..4ff0d7092d8 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -49,7 +49,7 @@ ExternalTableDataPtr BaseExternalTable::getData(ContextPtr context) { initReadBuffer(); initSampleBlock(); - auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").get()); + auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").safeGet()); auto data = std::make_unique(); data->pipe = std::make_unique(); @@ -85,7 +85,7 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction. /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty. if (column) - structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -102,7 +102,7 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error); for (size_t i = 0; i < type_list_raw->children.size(); ++i) - structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false)); } void BaseExternalTable::initSampleBlock() diff --git a/src/Core/Field.h b/src/Core/Field.h index f1bb4a72b0d..ba8c66580ad 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -457,15 +457,6 @@ public: std::string_view getTypeName() const; bool isNull() const { return which == Types::Null; } - template - NearestFieldType> & get(); - - template - const auto & get() const - { - auto * mutable_this = const_cast *>(this); - return mutable_this->get(); - } bool isNegativeInfinity() const { return which == Types::Null && get().isNegativeInfinity(); } bool isPositiveInfinity() const { return which == Types::Null && get().isPositiveInfinity(); } @@ -681,6 +672,25 @@ private: Types::Which which; + /// This function is prone to type punning and should never be used outside of Field class, + /// whenever it is used within this class the stored type should be checked in advance. + template + NearestFieldType> & get() + { + // Before storing the value in the Field, we static_cast it to the field + // storage type, so here we return the value of storage type as well. + // Otherwise, it is easy to make a mistake of reinterpret_casting the stored + // value to a different and incompatible type. + // For example, a Float32 value is stored as Float64, and it is incorrect to + // return a reference to this value as Float32. + return *reinterpret_cast>*>(&storage); + } + + template + NearestFieldType> & get() const + { + return const_cast(this)->get(); + } /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -866,48 +876,21 @@ constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) || t == Field::Types::Bool; } -// Field value getter with type checking in debug builds. -template -NearestFieldType> & Field::get() -{ - // Before storing the value in the Field, we static_cast it to the field - // storage type, so here we return the value of storage type as well. - // Otherwise, it is easy to make a mistake of reinterpret_casting the stored - // value to a different and incompatible type. - // For example, a Float32 value is stored as Float64, and it is incorrect to - // return a reference to this value as Float32. - using StoredType = NearestFieldType>; - -#ifndef NDEBUG - // Disregard signedness when converting between int64 types. - constexpr Field::Types::Which target = TypeToEnum::value; - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which)) && target != Field::Types::IPv4) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid Field get from type {} to type {}", which, target); -#endif - - StoredType * MAY_ALIAS ptr = reinterpret_cast(&storage); - - return *ptr; -} - - template auto & Field::safeGet() { const Types::Which target = TypeToEnum>>::value; - /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) - throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), target); + /// bool is stored as uint64, will be returned as UInt64 when requested as bool or UInt64, as Int64 when requested as Int64 + /// also allow UInt64 <-> Int64 conversion + if (target != which && + !(which == Field::Types::Bool && (target == Field::Types::UInt64 || target == Field::Types::Int64)) && + !(isInt64OrUInt64FieldType(which) && isInt64OrUInt64FieldType(target))) + throw Exception(ErrorCodes::BAD_GET, "Bad get: has {}, requested {}", getTypeName(), target); return get(); } - template requires not_field_or_bool_or_stringlike Field::Field(T && rhs) diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index 7916f882979..28b583a0cfe 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -24,9 +24,7 @@ void GTIDSet::tryMerge(size_t i) void GTIDSets::parse(String gtid_format) { if (gtid_format.empty()) - { return; - } std::vector gtid_sets; boost::split(gtid_sets, gtid_format, [](char c) { return c == ','; }); diff --git a/src/Core/MySQL/tests/gtest_MySQLGtid.cpp b/src/Core/MySQL/tests/gtest_MySQLGtid.cpp index e31a87aaa39..e5a2fe44e5c 100644 --- a/src/Core/MySQL/tests/gtest_MySQLGtid.cpp +++ b/src/Core/MySQL/tests/gtest_MySQLGtid.cpp @@ -10,20 +10,19 @@ GTEST_TEST(GTIDSetsContains, Tests) contained1, contained2, contained3, contained4, contained5, not_contained1, not_contained2, not_contained3, not_contained4, not_contained5, not_contained6; - gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); - contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); + gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60"); + contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60"); contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:2-3:11:47-49"); contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:11"); - contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:47-49:60"); - contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:60"); + contained4.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:2-16:47-49:60"); + contained5.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:60"); - not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); + not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60"); not_contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:0-3:11:47-49"); not_contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:99"); - not_contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:46-49:60"); - not_contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:99"); - not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60, 00000000-0000-0000-0000-000000000000"); - + not_contained4.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:2-16:46-49:60"); + not_contained5.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:99"); + not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60, 00000000-0000-0000-0000-000000000000"); ASSERT_TRUE(gtid_set.contains(contained1)); ASSERT_TRUE(gtid_set.contains(contained2)); diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 5014564dbe0..054fc3b2226 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -23,7 +23,7 @@ namespace postgres { PoolWithFailover::PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 502a9a9b7d7..2237c752367 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -8,7 +8,6 @@ #include "ConnectionHolder.h" #include #include -#include #include @@ -20,12 +19,12 @@ namespace postgres class PoolWithFailover { - -using RemoteDescription = std::vector>; - public: + using ReplicasConfigurationByPriority = std::map>; + using RemoteDescription = std::vector>; + PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 7e6893c6d85..790987272fa 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -81,6 +81,11 @@ static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467; static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468; +static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469; + +/// Packets size header +static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470; + /// Version of ClickHouse TCP protocol. /// /// Should be incremented manually on protocol changes. @@ -88,6 +93,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468; /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54468; +static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470; } diff --git a/src/Core/Range.cpp b/src/Core/Range.cpp index 956b96653a1..1a5ce1e012e 100644 --- a/src/Core/Range.cpp +++ b/src/Core/Range.cpp @@ -62,27 +62,27 @@ void Range::shrinkToIncludedIfPossible() { if (left.isExplicit() && !left_included) { - if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::UInt64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } - if (left.getType() == Field::Types::Int64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::Int64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } } if (right.isExplicit() && !right_included) { - if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::UInt64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } - if (right.getType() == Field::Types::Int64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::Int64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d13e6251ca9..79173503f28 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -106,6 +106,8 @@ namespace DB M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ @@ -134,6 +136,7 @@ namespace DB M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ + M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \ M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \ diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 9c9c9c1db00..45bd2b9eb42 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -118,7 +118,7 @@ void Settings::set(std::string_view name, const Field & value) { if (value.getType() != Field::Types::Which::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName()); - applyCompatibilitySetting(value.get()); + applyCompatibilitySetting(value.safeGet()); } /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3db27f8aef..479d5939b57 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -325,6 +325,7 @@ class IColumn; \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ \ + M(Int32, join_output_by_rowlist_perkey_rows_threshold, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join.", 0) \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ M(Bool, single_join_prefer_left_table, true, "For single JOIN in case of identifier ambiguity prefer left table", IMPORTANT) \ @@ -593,7 +594,6 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ @@ -605,7 +605,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ @@ -616,6 +616,7 @@ class IColumn; M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ + M(Bool, allow_materialized_view_with_bad_select, true, "Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -676,6 +677,7 @@ class IColumn; M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \ M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \ M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \ + M(String, query_cache_tag, "", "A string which acts as a label for query cache entries. The same queries with different tags are considered different by the query cache.", 0) \ M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \ \ M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ @@ -690,6 +692,7 @@ class IColumn; M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \ \ M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \ + M(Bool, allow_experimental_kafka_offsets_storage_in_keeper, false, "Allow experimental feature to store Kafka related offsets in ClickHouse Keeper. When enabled a ClickHouse Keeper path and replica name can be specified to the Kafka table engine. As a result instead of the regular Kafka engine, a new type of storage engine will be used that stores the committed offsets primarily in ClickHouse Keeper", 0) \ M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \ M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \ M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \ @@ -876,9 +879,10 @@ class IColumn; M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ + M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ - M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ + M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function, as well as in the `SHOW CREATE TABLE` query and the `formatQuery` function.", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ @@ -895,6 +899,7 @@ class IColumn; M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ + M(Bool, create_if_not_exists, false, "Enable IF NOT EXISTS for CREATE statements by default", 0) \ \ \ /* ###################################### */ \ @@ -905,20 +910,20 @@ class IColumn; M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ + M(Bool, allow_experimental_time_series_table, false, "Allows experimental TimeSeries table engine", 0) \ + M(Bool, allow_experimental_vector_similarity_index, false, "Allow experimental vector similarity index", 0) \ M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ - M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ - M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_json_type, false, "Allow JSON data type", 0) \ M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ - M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ - M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\ \ M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ @@ -1034,6 +1039,10 @@ class IColumn; MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \ MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \ MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_annoy_index, false) \ + MAKE_OBSOLETE(M, UInt64, max_threads_for_annoy_index_creation, 4) \ + MAKE_OBSOLETE(M, Int64, annoy_index_search_k_nodes, -1) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_usearch_index, false) \ MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \ MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \ @@ -1127,10 +1136,13 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \ + M(Bool, type_json_skip_duplicated_paths, false, "When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_datetimes_only_datetime64, false, "When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types", 0) \ M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ @@ -1241,6 +1253,7 @@ class IColumn; M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \ \ M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \ + M(Bool, rows_before_aggregation, false, "When enabled, ClickHouse will provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation", 0) \ M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 41d5180ead0..5e831c6301c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -71,16 +71,31 @@ static std::initializer_list(f.get()); + return stringToNumber(f.safeGet()); } else if (f.getType() == Field::Types::UInt64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Int64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Bool) { - return T(f.get()); + return T(f.safeGet()); } else if (f.getType() == Field::Types::Float64) { - Float64 x = f.get(); + Float64 x = f.safeGet(); if constexpr (std::is_floating_point_v) { return T(x); @@ -120,7 +120,7 @@ namespace if (f.getType() == Field::Types::String) { /// Allow to parse Map from string field. For the convenience. - const auto & str = f.get(); + const auto & str = f.safeGet(); return stringToMap(str); } @@ -218,7 +218,7 @@ namespace UInt64 fieldToMaxThreads(const Field & f) { if (f.getType() == Field::Types::String) - return stringToMaxThreads(f.get()); + return stringToMaxThreads(f.safeGet()); else return fieldToNumber(f); } @@ -237,7 +237,7 @@ SettingFieldMaxThreads & SettingFieldMaxThreads::operator=(const Field & f) String SettingFieldMaxThreads::toString() const { if (is_auto) - return "'auto(" + ::DB::toString(value) + ")'"; + return "auto(" + ::DB::toString(value) + ")"; else return ::DB::toString(value); } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 266141815e3..533d69f3fbb 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -153,7 +153,7 @@ struct SettingFieldMaxThreads operator UInt64() const { return value; } /// NOLINT explicit operator Field() const { return value; } - /// Writes "auto()" instead of simple "" if `is_auto==true`. + /// Writes "auto()" instead of simple "" if `is_auto == true`. String toString() const; void parseFromString(const String & str); diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 5541cc19653..3127a5ef36d 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -100,7 +100,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) return current_value; }; - UInt64 max_threads = get_current_value("max_threads").get(); + UInt64 max_threads = get_current_value("max_threads").safeGet(); UInt64 max_threads_max_value = 256 * getNumberOfPhysicalCPUCores(); if (max_threads > max_threads_max_value) { @@ -120,7 +120,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) "input_format_parquet_max_block_size"}; for (auto const & setting : block_rows_settings) { - if (auto block_size = get_current_value(setting).get(); + if (auto block_size = get_current_value(setting).safeGet(); block_size > max_sane_block_rows_size) { if (log) @@ -129,7 +129,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) } } - if (auto max_block_size = get_current_value("max_block_size").get(); max_block_size == 0) + if (auto max_block_size = get_current_value("max_block_size").safeGet(); max_block_size == 0) { if (log) LOG_WARNING(log, "Sanity check: 'max_block_size' cannot be 0. Set to default value {}", DEFAULT_BLOCK_SIZE); diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h index e4f850cbb59..1eba944e63e 100644 --- a/src/Core/TypeId.h +++ b/src/Core/TypeId.h @@ -45,6 +45,7 @@ enum class TypeIndex : uint8_t AggregateFunction, LowCardinality, Map, + ObjectDeprecated, Object, IPv4, IPv6, diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index f5f67df563b..ae5afce36be 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB @@ -212,6 +213,8 @@ static bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... ar case TypeIndex::IPv4: return f(TypePair(), std::forward(args)...); case TypeIndex::IPv6: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + default: break; } diff --git a/src/Core/examples/field.cpp b/src/Core/examples/field.cpp index 110e11d0cb1..3064290e127 100644 --- a/src/Core/examples/field.cpp +++ b/src/Core/examples/field.cpp @@ -37,7 +37,7 @@ int main(int argc, char ** argv) std::cerr << applyVisitor(to_string, field) << std::endl; } - field.get().push_back(field); + field.safeGet().push_back(field); std::cerr << applyVisitor(to_string, field) << std::endl; std::cerr << (field < field2) << std::endl; diff --git a/src/Core/fuzzers/CMakeLists.txt b/src/Core/fuzzers/CMakeLists.txt index 3c5c0eed4e6..61d6b9629eb 100644 --- a/src/Core/fuzzers/CMakeLists.txt +++ b/src/Core/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp) -target_link_libraries (names_and_types_fuzzer PRIVATE dbms clickhouse_functions) +target_link_libraries (names_and_types_fuzzer PRIVATE clickhouse_functions) diff --git a/src/Core/tests/gtest_field.cpp b/src/Core/tests/gtest_field.cpp index 5585442d835..7e778be9575 100644 --- a/src/Core/tests/gtest_field.cpp +++ b/src/Core/tests/gtest_field.cpp @@ -8,31 +8,31 @@ GTEST_TEST(Field, FromBool) { Field f{false}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f{true}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } { Field f; f = false; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f; f = true; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } } @@ -42,15 +42,15 @@ GTEST_TEST(Field, Move) Field f; f = Field{String{"Hello, world (1)"}}; - ASSERT_EQ(f.get(), "Hello, world (1)"); + ASSERT_EQ(f.safeGet(), "Hello, world (1)"); f = Field{String{"Hello, world (2)"}}; - ASSERT_EQ(f.get(), "Hello, world (2)"); + ASSERT_EQ(f.safeGet(), "Hello, world (2)"); f = Field{Array{Field{String{"Hello, world (3)"}}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (3)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (3)"); f = String{"Hello, world (4)"}; - ASSERT_EQ(f.get(), "Hello, world (4)"); + ASSERT_EQ(f.safeGet(), "Hello, world (4)"); f = Array{Field{String{"Hello, world (5)"}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (5)"); f = Array{String{"Hello, world (6)"}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (6)"); } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index e7ae8ea5a1d..c42bf7641d2 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -16,39 +16,29 @@ #include #if defined(OS_LINUX) - #include +#include #endif #include #include #include - #include #include #include -#include #include -#include #include #include #include #include #include - #include #include #include -#include #include -#include #include -#include #include -#include #include -#include -#include #include #include #include @@ -459,17 +449,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) - String build_id_hex = SymbolIndex::instance().getBuildIDHex(); - if (build_id_hex.empty()) - build_id = ""; - else - build_id = build_id_hex; -#else - build_id = ""; + build_id = SymbolIndex::instance().getBuildIDHex(); #endif - git_hash = GIT_HASH; - #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -482,7 +464,7 @@ void BaseDaemon::logRevision() const { logger().information("Starting " + std::string{VERSION_FULL} + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", git hash: " + std::string(GIT_HASH) + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index b15aa74fcf3..a6efa94a567 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -165,7 +165,6 @@ protected: Poco::Util::AbstractConfiguration * last_configuration = nullptr; String build_id; - String git_hash; String stored_binary_hash; bool should_setup_watchdog = false; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 35ea2122dbb..2068af2200d 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,10 +1,7 @@ -configure_file(GitHash.cpp.in GitHash.generated.cpp) - add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - GitHash.generated.cpp ) target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index a4cd3b9e511..1facaaab0d6 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -129,7 +129,7 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const Field DataTypeAggregateFunction::getDefault() const { Field field = AggregateFunctionStateData(); - field.get().name = getName(); + field.safeGet().name = getName(); AlignedBuffer place_buffer(function->sizeOfData(), function->alignOfData()); AggregateDataPtr place = place_buffer.data(); @@ -138,7 +138,7 @@ Field DataTypeAggregateFunction::getDefault() const try { - WriteBufferFromString buffer_from_field(field.get().data); + WriteBufferFromString buffer_from_field(field.safeGet().data); function->serialize(place, buffer_from_field, version); } catch (...) diff --git a/src/DataTypes/DataTypeCustomGeo.cpp b/src/DataTypes/DataTypeCustomGeo.cpp index 0736d837d46..f90788ec403 100644 --- a/src/DataTypes/DataTypeCustomGeo.cpp +++ b/src/DataTypes/DataTypeCustomGeo.cpp @@ -24,6 +24,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory) std::make_unique(std::make_unique())); }); + // Custom type for multiple lines stored as Array(LineString) + factory.registerSimpleDataTypeCustom("MultiLineString", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(LineString)"), + std::make_unique(std::make_unique())); + }); + // Custom type for simple polygon without holes stored as Array(Point) factory.registerSimpleDataTypeCustom("Ring", [] { diff --git a/src/DataTypes/DataTypeCustomGeo.h b/src/DataTypes/DataTypeCustomGeo.h index 0a1c83e4638..6a632f0d05c 100644 --- a/src/DataTypes/DataTypeCustomGeo.h +++ b/src/DataTypes/DataTypeCustomGeo.h @@ -17,6 +17,12 @@ public: DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {} }; +class DataTypeMultiLineStringName : public DataTypeCustomFixedName +{ +public: + DataTypeMultiLineStringName() : DataTypeCustomFixedName("MultiLineString") {} +}; + class DataTypeRingName : public DataTypeCustomFixedName { public: diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index a1b1f8325f0..c35f7526a18 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,8 @@ #include #include #include +#include +#include namespace DB { @@ -65,16 +68,20 @@ static DataTypePtr create(const ASTPtr & arguments) if (!argument || argument->name != "equals") throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Dynamic data type argument should be in a form 'max_types=N'"); - auto identifier_name = argument->arguments->children[0]->as()->name(); + const auto * identifier = argument->arguments->children[0]->as(); + if (!identifier) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected Dynamic type argument: {}. Expected expression 'max_types=N'", identifier->formatForErrorMessage()); + + auto identifier_name = identifier->name(); if (identifier_name != "max_types") throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name); auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) - throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet() > ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 0 and {}", ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT); - return std::make_shared(literal->value.get()); + return std::make_shared(literal->value.safeGet()); } void registerDataTypeDynamic(DataTypeFactory & factory) @@ -82,32 +89,118 @@ void registerDataTypeDynamic(DataTypeFactory & factory) factory.registerDataType("Dynamic", create); } +namespace +{ + +/// Split Dynamic subcolumn name into 2 parts: type name and subcolumn of this type. +/// We cannot simply split by '.' because type name can also contain dots. For example: Tuple(`a.b` UInt32). +/// But in all such cases this '.' will be inside back quotes. To split subcolumn name correctly +/// we search for the first '.' that is not inside back quotes. +std::pair splitSubcolumnName(std::string_view subcolumn_name) +{ + bool inside_quotes = false; + const char * pos = subcolumn_name.data(); + const char * end = subcolumn_name.data() + subcolumn_name.size(); + while (true) + { + pos = find_first_symbols<'`', '.', '\\'>(pos, end); + if (pos == end) + break; + + if (*pos == '`') + { + inside_quotes = !inside_quotes; + ++pos; + } + else if (*pos == '\\') + { + ++pos; + } + else if (*pos == '.') + { + if (inside_quotes) + ++pos; + else + break; + } + } + + if (pos == end) + return {subcolumn_name, {}}; + + return {std::string_view(subcolumn_name.data(), pos), std::string_view(pos + 1, end)}; +} + +} + std::unique_ptr DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const { - auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name); + auto [type_subcolumn_name, subcolumn_nested_name] = splitSubcolumnName(subcolumn_name); /// Check if requested subcolumn is a valid data type. - auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name)); + auto subcolumn_type = DataTypeFactory::instance().tryGet(String(type_subcolumn_name)); if (!subcolumn_type) { if (throw_if_null) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", type_subcolumn_name); return nullptr; } std::unique_ptr res = std::make_unique(subcolumn_type->getDefaultSerialization()); res->type = subcolumn_type; std::optional discriminator; + ColumnPtr null_map_for_variant_from_shared_variant; if (data.column) { /// If column was provided, we should extract subcolumn from Dynamic column. const auto & dynamic_column = assert_cast(*data.column); const auto & variant_info = dynamic_column.getVariantInfo(); + const auto & variant_column = dynamic_column.getVariantColumn(); + const auto & shared_variant = dynamic_column.getSharedVariant(); /// Check if provided Dynamic column has subcolumn of this type. - auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName()); + String subcolumn_type_name = subcolumn_type->getName(); + auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type_name); if (it != variant_info.variant_name_to_discriminator.end()) { discriminator = it->second; - res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator); + res->column = variant_column.getVariantPtrByGlobalDiscriminator(*discriminator); + } + /// Otherwise if there is data in shared variant try to find requested type there. + else if (!shared_variant.empty()) + { + /// Create null map for resulting subcolumn to make it Nullable. + auto null_map_column = ColumnUInt8::create(); + NullMap & null_map = assert_cast(*null_map_column).getData(); + null_map.reserve(variant_column.size()); + auto subcolumn = subcolumn_type->createColumn(); + auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column.getSharedVariantDiscriminator()); + const auto & local_discriminators = variant_column.getLocalDiscriminators(); + const auto & offsets = variant_column.getOffsets(); + const FormatSettings format_settings; + for (size_t i = 0; i != local_discriminators.size(); ++i) + { + if (local_discriminators[i] == shared_variant_local_discr) + { + auto value = shared_variant.getDataAt(offsets[i]); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + if (type->getName() == subcolumn_type_name) + { + dynamic_column.getVariantSerialization(subcolumn_type, subcolumn_type_name)->deserializeBinary(*subcolumn, buf, format_settings); + null_map.push_back(0); + } + else + { + null_map.push_back(1); + } + } + else + { + null_map.push_back(1); + } + } + + res->column = std::move(subcolumn); + null_map_for_variant_from_shared_variant = std::move(null_map_column); } } @@ -125,7 +218,7 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), String(subcolumn_nested_name), is_null_map_subcolumn); /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); if (!is_null_map_subcolumn && make_subcolumn_nullable) @@ -133,10 +226,10 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa if (data.column) { + /// Check if provided Dynamic column has subcolumn of this type. In this case we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to + /// create full subcolumn from variant according to discriminators. if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to - /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); std::unique_ptr creator; if (is_null_map_subcolumn) @@ -154,6 +247,21 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa make_subcolumn_nullable); res->column = creator->create(res->column); } + /// Check if requested type was extracted from shared variant. In this case we should use + /// VariantSubcolumnCreator to create full subcolumn from variant according to created null map. + else if (null_map_for_variant_from_shared_variant) + { + if (is_null_map_subcolumn) + { + res->column = null_map_for_variant_from_shared_variant; + } + else + { + SerializationVariantElement::VariantSubcolumnCreator creator( + null_map_for_variant_from_shared_variant, "", 0, 0, make_subcolumn_nullable, null_map_for_variant_from_shared_variant); + res->column = creator.create(res->column); + } + } /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. else if (is_null_map_subcolumn) { diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h index d5e4c5261ce..2e7a23d314d 100644 --- a/src/DataTypes/DataTypeDynamic.h +++ b/src/DataTypes/DataTypeDynamic.h @@ -12,6 +12,9 @@ class DataTypeDynamic final : public IDataType public: static constexpr bool is_parametric = true; + /// Don't change this constant, it can break backward compatibility. + static constexpr size_t DEFAULT_MAX_DYNAMIC_TYPES = 32; + explicit DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES); TypeIndex getTypeId() const override { return TypeIndex::Dynamic; } @@ -43,8 +46,6 @@ public: size_t getMaxDynamicTypes() const { return max_dynamic_types; } private: - static constexpr size_t DEFAULT_MAX_DYNAMIC_TYPES = 32; - SerializationPtr doGetDefaultSerialization() const override; String doGetName() const override; diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 08e0c0d2045..b9a5a1a5a68 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -122,12 +122,12 @@ Field DataTypeEnum::castToName(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - this->getValue(value_or_name.get()); /// Check correctness - return value_or_name.get(); + this->getValue(value_or_name.safeGet()); /// Check correctness + return value_or_name.safeGet(); } else if (value_or_name.getType() == Field::Types::Int64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); return this->getNameForValue(static_cast(value)).toString(); } @@ -141,12 +141,12 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - return this->getValue(value_or_name.get()); + return this->getValue(value_or_name.safeGet()); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); this->getNameForValue(static_cast(value)); /// Check correctness return value; @@ -220,7 +220,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - literal_child_assign_num = value_literal->value.get(); + literal_child_assign_num = value_literal->value.safeGet(); } assign_number_child.emplace_back(child); } @@ -269,8 +269,8 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - const String & field_name = name_literal->value.get(); - const auto value = value_literal->value.get(); + const String & field_name = name_literal->value.safeGet(); + const auto value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value {} for element '{}' exceeds range of {}", @@ -302,7 +302,7 @@ static DataTypePtr create(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - Int64 value = value_literal->value.get(); + Int64 value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) return createExact(arguments); diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 6f7dcd65b83..107d2d48135 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -150,6 +150,12 @@ DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) cons return type; } +DataTypePtr DataTypeFactory::getCustom(const String & base_name, DataTypeCustomDescPtr customization) const +{ + auto type = get(base_name); + type->setCustomization(std::move(customization)); + return type; +} void DataTypeFactory::registerDataType(const String & family_name, Value creator, Case case_sensitiveness) { @@ -267,9 +273,10 @@ DataTypeFactory::DataTypeFactory() registerDataTypeDomainSimpleAggregateFunction(*this); registerDataTypeDomainGeo(*this); registerDataTypeMap(*this); - registerDataTypeObject(*this); + registerDataTypeObjectDeprecated(*this); registerDataTypeVariant(*this); registerDataTypeDynamic(*this); + registerDataTypeJSON(*this); } DataTypeFactory & DataTypeFactory::instance() diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index edba9886d1c..7234c53551c 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -34,6 +34,7 @@ public: DataTypePtr get(const String & family_name, const ASTPtr & parameters) const; DataTypePtr get(const ASTPtr & ast) const; DataTypePtr getCustom(DataTypeCustomDescPtr customization) const; + DataTypePtr getCustom(const String & base_name, DataTypeCustomDescPtr customization) const; /// Return nullptr in case of error. DataTypePtr tryGet(const String & full_name) const; @@ -98,8 +99,9 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory); void registerDataTypeDomainBool(DataTypeFactory & factory); void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory); void registerDataTypeDomainGeo(DataTypeFactory & factory); -void registerDataTypeObject(DataTypeFactory & factory); +void registerDataTypeObjectDeprecated(DataTypeFactory & factory); void registerDataTypeVariant(DataTypeFactory & factory); void registerDataTypeDynamic(DataTypeFactory & factory); +void registerDataTypeJSON(DataTypeFactory & factory); } diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 080ff8826a5..63d5245287f 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -51,11 +51,11 @@ static DataTypePtr create(const ASTPtr & arguments) "FixedString data type family must have exactly one argument - size in bytes"); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.safeGet() == 0) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "FixedString data type family must have a number (positive integer) as its argument"); - return std::make_shared(argument->value.get()); + return std::make_shared(argument->value.safeGet()); } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 91b9bfcb2a5..a56764f4e6e 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -1,83 +1,525 @@ -#include #include -#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include +#include "config.h" + +#if USE_SIMDJSON +# include +#elif USE_RAPIDJSON +# include +#else +# include +#endif namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNEXPECTED_AST_STRUCTURE; + extern const int BAD_ARGUMENTS; + extern const int CANNOT_COMPILE_REGEXP; } -DataTypeObject::DataTypeObject(const String & schema_format_, bool is_nullable_) - : schema_format(Poco::toLower(schema_format_)) - , is_nullable(is_nullable_) +DataTypeObject::DataTypeObject( + const SchemaFormat & schema_format_, + std::unordered_map typed_paths_, + std::unordered_set paths_to_skip_, + std::vector path_regexps_to_skip_, + size_t max_dynamic_paths_, + size_t max_dynamic_types_) + : schema_format(schema_format_) + , typed_paths(std::move(typed_paths_)) + , paths_to_skip(std::move(paths_to_skip_)) + , path_regexps_to_skip(std::move(path_regexps_to_skip_)) + , max_dynamic_paths(max_dynamic_paths_) + , max_dynamic_types(max_dynamic_types_) +{ + /// Check if regular expressions are valid. + for (const auto & regexp_str : path_regexps_to_skip) + { + re2::RE2::Options options; + /// Don't log errors to stderr. + options.set_log_errors(false); + auto regexp = re2::RE2(regexp_str, options); + if (!regexp.ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Invalid regexp '{}': {}", regexp_str, regexp.error()); + } + + for (const auto & [typed_path, type] : typed_paths) + { + for (const auto & path_to_skip : paths_to_skip) + { + if (typed_path.starts_with(path_to_skip)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path '{}' is specified with the data type ('{}') and matches the SKIP path prefix '{}'", typed_path, type->getName(), path_to_skip); + } + + for (const auto & path_regex_to_skip : path_regexps_to_skip) + { + if (re2::RE2::FullMatch(typed_path, re2::RE2(path_regex_to_skip))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path '{}' is specified with the data type ('{}') and matches the SKIP REGEXP '{}'", typed_path, type->getName(), path_regex_to_skip); + } + } +} + +DataTypeObject::DataTypeObject(const DB::DataTypeObject::SchemaFormat & schema_format_, size_t max_dynamic_paths_, size_t max_dynamic_types_) + : schema_format(schema_format_) + , max_dynamic_paths(max_dynamic_paths_) + , max_dynamic_types(max_dynamic_types_) { } bool DataTypeObject::equals(const IDataType & rhs) const { if (const auto * object = typeid_cast(&rhs)) - return schema_format == object->schema_format && is_nullable == object->is_nullable; + { + if (typed_paths.size() != object->typed_paths.size()) + return false; + + for (const auto & [path, type] : typed_paths) + { + auto it = object->typed_paths.find(path); + if (it == object->typed_paths.end()) + return false; + if (!type->equals(*it->second)) + return false; + } + + return schema_format == object->schema_format && paths_to_skip == object->paths_to_skip && path_regexps_to_skip == object->path_regexps_to_skip + && max_dynamic_types == object->max_dynamic_types && max_dynamic_paths == object->max_dynamic_paths; + } + return false; } SerializationPtr DataTypeObject::doGetDefaultSerialization() const { - return getObjectSerialization(schema_format); + std::unordered_map typed_path_serializations; + typed_path_serializations.reserve(typed_paths.size()); + for (const auto & [path, type] : typed_paths) + typed_path_serializations[path] = type->getDefaultSerialization(); + + switch (schema_format) + { + case SchemaFormat::JSON: +#if USE_SIMDJSON + return std::make_shared>( + std::move(typed_path_serializations), + paths_to_skip, + path_regexps_to_skip, + buildJSONExtractTree(getPtr(), "JSON serialization")); +#elif USE_RAPIDJSON + return std::make_shared>( + std::move(typed_path_serializations), + paths_to_skip, + path_regexps_to_skip, + buildJSONExtractTree(getPtr(), "JSON serialization")); +#else + return std::make_shared>( + std::move(typed_path_serializations), + paths_to_skip, + path_regexps_to_skip, + buildJSONExtractTree(getPtr(), "JSON serialization")); +#endif + } } String DataTypeObject::doGetName() const { WriteBufferFromOwnString out; - if (is_nullable) - out << "Object(Nullable(" << quote << schema_format << "))"; - else - out << "Object(" << quote << schema_format << ")"; + out << magic_enum::enum_name(schema_format); + bool first = true; + auto write_separator = [&]() + { + if (!first) + { + out << ", "; + } + else + { + out << "("; + first = false; + } + }; + + if (max_dynamic_types != DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES) + { + write_separator(); + out << "max_dynamic_types=" << max_dynamic_types; + } + + if (max_dynamic_paths != DEFAULT_MAX_SEPARATELY_STORED_PATHS) + { + write_separator(); + out << "max_dynamic_paths=" << max_dynamic_paths; + } + + std::vector sorted_typed_paths; + sorted_typed_paths.reserve(typed_paths.size()); + for (const auto & [path, _] : typed_paths) + sorted_typed_paths.push_back(path); + std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end()); + for (const auto & path : sorted_typed_paths) + { + write_separator(); + out << backQuoteIfNeed(path) << " " << typed_paths.at(path)->getName(); + } + + std::vector sorted_skip_paths; + sorted_skip_paths.reserve(paths_to_skip.size()); + for (const auto & skip_path : paths_to_skip) + sorted_skip_paths.push_back(skip_path); + std::sort(sorted_skip_paths.begin(), sorted_skip_paths.end()); + for (const auto & skip_path : sorted_skip_paths) + { + write_separator(); + out << "SKIP " << backQuoteIfNeed(skip_path); + } + + for (const auto & skip_regexp : path_regexps_to_skip) + { + write_separator(); + out << "SKIP REGEXP " << quoteString(skip_regexp); + } + + if (!first) + out << ")"; + return out.str(); } -static DataTypePtr create(const ASTPtr & arguments) +MutableColumnPtr DataTypeObject::createColumn() const { - if (!arguments || arguments->children.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Object data type family must have one argument - name of schema format"); + std::unordered_map typed_path_columns; + typed_path_columns.reserve(typed_paths.size()); + for (const auto & [path, type] : typed_paths) + typed_path_columns[path] = type->createColumn(); - ASTPtr schema_argument = arguments->children[0]; - bool is_nullable = false; + return ColumnObject::create(std::move(typed_path_columns), max_dynamic_paths, max_dynamic_types); +} - if (const auto * type = schema_argument->as()) +namespace +{ + +/// It is possible to have nested JSON object inside Dynamic. For example when we have an array of JSON objects. +/// During type inference in parsing in case of creating nested JSON objects, we reduce max_dynamic_paths/max_dynamic_types by factors +/// NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR/NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR. +/// So the type name will actually be JSON(max_dynamic_paths=N, max_dynamic_types=M). But we want the user to be able to query it +/// using json.array.:`Array(JSON)`.some.path without specifying max_dynamic_paths/max_dynamic_types. +/// To support it, we do a trick - we replace JSON name in subcolumn to JSON(max_dynamic_paths=N, max_dynamic_types=M), because we know +/// the exact values of max_dynamic_paths/max_dynamic_types for it. +void replaceJSONTypeNameIfNeeded(String & type_name, size_t max_dynamic_paths, size_t max_dynamic_types) +{ + auto pos = type_name.find("JSON"); + while (pos != String::npos) { - if (type->name != "Nullable" || type->arguments->children.size() != 1) - throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, - "Expected 'Nullable()' as parameter for type Object (function: {})", type->name); + /// Replace only if we don't already have parameters in JSON type declaration. + if (pos + 4 == type_name.size() || type_name[pos + 4] != '(') + type_name.replace( + pos, + 4, + fmt::format( + "JSON(max_dynamic_paths={}, max_dynamic_types={})", + max_dynamic_paths / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR, + max_dynamic_types / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR)); + pos = type_name.find("JSON", pos + 4); + } +} - schema_argument = type->arguments->children[0]; - is_nullable = true; +/// JSON subcolumn name with Dynamic type subcolumn looks like this: +/// "json.some.path.:`Type_name`.some.subcolumn". +/// We back quoted type name during identifier parsing so we can distinguish type subcolumn and path element ":TypeName". +std::pair splitPathAndDynamicTypeSubcolumn(std::string_view subcolumn_name, size_t max_dynamic_paths, size_t max_dynamic_types) +{ + /// Try to find dynamic type subcolumn in a form .:`Type`. + auto pos = subcolumn_name.find(".:`"); + if (pos == std::string_view::npos) + return {String(subcolumn_name), ""}; + + ReadBufferFromMemory buf(subcolumn_name.substr(pos + 2)); + String dynamic_subcolumn; + /// Try to read back quoted type name. + if (!tryReadBackQuotedString(dynamic_subcolumn, buf)) + return {String(subcolumn_name), ""}; + + replaceJSONTypeNameIfNeeded(dynamic_subcolumn, max_dynamic_paths, max_dynamic_types); + + /// If there is more data in the buffer - it's subcolumn of a type, append it to the type name. + if (!buf.eof()) + dynamic_subcolumn += String(buf.position(), buf.available()); + + return {String(subcolumn_name.substr(0, pos)), dynamic_subcolumn}; +} + +/// Sub-object subcolumn in JSON path always looks like "^`some`.path.path". +/// We back quote first path element after `^` so we can distinguish sub-object subcolumn and path element "^path". +std::optional tryGetSubObjectSubcolumn(std::string_view subcolumn_name) +{ + if (!subcolumn_name.starts_with("^`")) + return std::nullopt; + + ReadBufferFromMemory buf(subcolumn_name.data() + 1); + String path; + /// Try to read back-quoted first path element. + if (!tryReadBackQuotedString(path, buf)) + return std::nullopt; + + /// Add remaining path elements if any. + return path + String(buf.position(), buf.available()); +} + +/// Return sub-path by specified prefix. +/// For example, for prefix a.b: +/// a.b.c.d -> c.d, a.b.c -> c +String getSubPath(const String & path, const String & prefix) +{ + return path.substr(prefix.size() + 1); +} + +std::string_view getSubPath(std::string_view path, const String & prefix) +{ + return path.substr(prefix.size() + 1); +} + +} + +std::unique_ptr DataTypeObject::getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const +{ + /// Check if it's sub-object subcolumn. + /// In this case we should return JSON column with all paths that are inside specified object prefix. + /// For example, if we have {"a" : {"b" : {"c" : {"d" : 10, "e" : "Hello"}, "f" : [1, 2, 3]}}} and subcolumn ^a.b + /// we should return JSON column with data {"c" : {"d" : 10, "e" : Hello}, "f" : [1, 2, 3]} + if (auto sub_object_subcolumn = tryGetSubObjectSubcolumn(subcolumn_name)) + { + const String & prefix = *sub_object_subcolumn; + /// Collect new typed paths. + std::unordered_map typed_sub_paths; + /// Collect serializations for typed paths. They will be needed for sub-object subcolumn deserialization. + std::unordered_map typed_paths_serializations; + for (const auto & [path, type] : typed_paths) + { + if (path.starts_with(prefix) && path.size() != prefix.size()) + { + typed_sub_paths[getSubPath(path, prefix)] = type; + typed_paths_serializations[path] = type->getDefaultSerialization(); + } + } + + std::unique_ptr res = std::make_unique(std::make_shared(prefix, typed_paths_serializations)); + /// Keep all current constraints like limits and skip paths/prefixes/regexps. + res->type = std::make_shared(schema_format, typed_sub_paths, paths_to_skip, path_regexps_to_skip, max_dynamic_paths, max_dynamic_types); + /// If column was provided, we should create a column for the requested subcolumn. + if (data.column) + { + const auto & object_column = assert_cast(*data.column); + + auto result_column = res->type->createColumn(); + auto & result_object_column = assert_cast(*result_column); + + /// Iterate over all typed/dynamic/shared data paths and collect all paths with specified prefix. + auto & result_typed_columns = result_object_column.getTypedPaths(); + for (const auto & [path, column] : object_column.getTypedPaths()) + { + if (path.starts_with(prefix) && path.size() != prefix.size()) + result_typed_columns[getSubPath(path, prefix)] = column; + } + + auto & result_dynamic_columns = result_object_column.getDynamicPaths(); + auto & result_dynamic_columns_ptrs = result_object_column.getDynamicPathsPtrs(); + for (const auto & [path, column] : object_column.getDynamicPaths()) + { + if (path.starts_with(prefix) && path.size() != prefix.size()) + { + auto sub_path = getSubPath(path, prefix); + result_dynamic_columns[sub_path] = column; + result_dynamic_columns_ptrs[sub_path] = assert_cast(result_dynamic_columns[sub_path].get()); + } + } + + const auto & shared_data_offsets = object_column.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = object_column.getSharedDataPathsAndValues(); + auto & result_shared_data_offsets = result_object_column.getSharedDataOffsets(); + result_shared_data_offsets.reserve(shared_data_offsets.size()); + auto [result_shared_data_paths, result_shared_data_values] = result_object_column.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_offsets.size(); ++i) + { + size_t start = shared_data_offsets[ssize_t(i) - 1]; + size_t end = shared_data_offsets[ssize_t(i)]; + size_t lower_bound_index = ColumnObject::findPathLowerBoundInSharedData(prefix, *shared_data_paths, start, end); + for (; lower_bound_index != end; ++lower_bound_index) + { + auto path = shared_data_paths->getDataAt(lower_bound_index).toView(); + if (!path.starts_with(prefix)) + break; + + /// Don't include path that is equal to the prefix. + if (path.size() != prefix.size()) + { + auto sub_path = getSubPath(path, prefix); + result_shared_data_paths->insertData(sub_path.data(), sub_path.size()); + result_shared_data_values->insertFrom(*shared_data_values, lower_bound_index); + } + } + result_shared_data_offsets.push_back(result_shared_data_paths->size()); + } + + res->column = std::move(result_column); + } + + return res; } - const auto * literal = schema_argument->as(); - if (!literal || literal->value.getType() != Field::Types::String) - throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, - "Object data type family must have a const string as its schema name parameter"); + /// Split requested subcolumn to the JSON path and Dynamic type subcolumn. + auto [path, path_subcolumn] = splitPathAndDynamicTypeSubcolumn(subcolumn_name, max_dynamic_paths, max_dynamic_types); + std::unique_ptr res; + if (auto it = typed_paths.find(path); it != typed_paths.end()) + { + res = std::make_unique(it->second->getDefaultSerialization()); + res->type = it->second; + } + else + { + res = std::make_unique(std::make_shared()); + res->type = std::make_shared(); + } - return std::make_shared(literal->value.get(), is_nullable); + /// If column was provided, we should create a column for requested subcolumn. + if (data.column) + { + const auto & object_column = assert_cast(*data.column); + /// Try to find requested path in typed paths. + if (auto typed_it = object_column.getTypedPaths().find(path); typed_it != object_column.getTypedPaths().end()) + { + res->column = typed_it->second; + } + /// Try to find requested path in dynamic paths. + else if (auto dynamic_it = object_column.getDynamicPaths().find(path); dynamic_it != object_column.getDynamicPaths().end()) + { + res->column = dynamic_it->second; + } + /// Extract values of requested path from shared data. + else + { + auto dynamic_column = ColumnDynamic::create(max_dynamic_types); + dynamic_column->reserve(object_column.size()); + ColumnObject::fillPathColumnFromSharedData(*dynamic_column, path, object_column.getSharedDataPtr(), 0, object_column.size()); + res->column = std::move(dynamic_column); + } + } + + /// Get subcolumn for Dynamic type if needed. + if (!path_subcolumn.empty()) + { + res = res->type->getSubcolumnData(path_subcolumn, *res, throw_if_null); + if (!res) + return nullptr; + } + + if (typed_paths.contains(path)) + res->serialization = std::make_shared(res->serialization, path); + else + res->serialization = std::make_shared(res->serialization, path, path_subcolumn, max_dynamic_types); + + return res; } -void registerDataTypeObject(DataTypeFactory & factory) +static DataTypePtr createObject(const ASTPtr & arguments, const DataTypeObject::SchemaFormat & schema_format) { - factory.registerDataType("Object", create); - factory.registerSimpleDataType("JSON", - [] { return std::make_shared("JSON", false); }, - DataTypeFactory::Case::Insensitive); + if (!arguments || arguments->children.empty()) + return std::make_shared(schema_format); + + std::unordered_map typed_paths; + std::unordered_set paths_to_skip; + std::vector path_regexps_to_skip; + + size_t max_dynamic_types = DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES; + size_t max_dynamic_paths = DataTypeObject::DEFAULT_MAX_SEPARATELY_STORED_PATHS; + + for (const auto & argument : arguments->children) + { + const auto * object_type_argument = argument->as(); + if (object_type_argument->parameter) + { + const auto * function = object_type_argument->parameter->as(); + + if (!function || function->name != "equals") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected parameter in {} type arguments: {}", magic_enum::enum_name(schema_format), function->formatForErrorMessage()); + + const auto * identifier = function->arguments->children[0]->as(); + if (!identifier) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected {} type argument: {}. Expected expression 'max_dynamic_types=N' or 'max_dynamic_paths=N'", magic_enum::enum_name(schema_format), function->formatForErrorMessage()); + + auto identifier_name = identifier->name(); + if (identifier_name != "max_dynamic_types" && identifier_name != "max_dynamic_paths") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected parameter in {} type arguments: {}. Expected 'max_dynamic_types' or `max_dynamic_paths`", magic_enum::enum_name(schema_format), identifier_name); + + auto * literal = function->arguments->children[1]->as(); + /// Is 1000000 a good maximum for max paths? + size_t max_value = identifier_name == "max_dynamic_types" ? ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT : 1000000; + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet() > max_value) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'{}' parameter for {} type should be a positive integer between 0 and {}. Got {}", identifier_name, magic_enum::enum_name(schema_format), max_value, function->arguments->children[1]->formatForErrorMessage()); + + if (identifier_name == "max_dynamic_types") + max_dynamic_types = literal->value.safeGet(); + else + max_dynamic_paths = literal->value.safeGet(); + } + else if (object_type_argument->path_with_type) + { + const auto * path_with_type = object_type_argument->path_with_type->as(); + auto data_type = DataTypeFactory::instance().get(path_with_type->type); + if (typed_paths.contains(path_with_type->name)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found duplicated path with type: {}", path_with_type->name); + typed_paths.emplace(path_with_type->name, data_type); + } + else if (object_type_argument->skip_path) + { + const auto * identifier = object_type_argument->skip_path->as(); + if (!identifier) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST in SKIP section of {} type arguments: {}. Expected identifier with path name", magic_enum::enum_name(schema_format), object_type_argument->skip_path->formatForErrorMessage()); + + paths_to_skip.insert(identifier->name()); + } + else if (object_type_argument->skip_path_regexp) + { + const auto * literal = object_type_argument->skip_path_regexp->as(); + if (!literal || literal->value.getType() != Field::Types::String) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST in SKIP section of {} type arguments: {}. Expected identifier with path name", magic_enum::enum_name(schema_format), object_type_argument->skip_path->formatForErrorMessage()); + + path_regexps_to_skip.push_back(literal->value.safeGet()); + } + } + + std::sort(path_regexps_to_skip.begin(), path_regexps_to_skip.end()); + return std::make_shared(schema_format, std::move(typed_paths), std::move(paths_to_skip), std::move(path_regexps_to_skip), max_dynamic_paths, max_dynamic_types); +} + +static DataTypePtr createJSON(const ASTPtr & arguments) +{ + return createObject(arguments, DataTypeObject::SchemaFormat::JSON); +} + +void registerDataTypeJSON(DataTypeFactory & factory) +{ + if (!Context::getGlobalContextInstance()->getSettingsRef().use_json_alias_for_old_object_type) + factory.registerDataType("JSON", createJSON, DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index c610a1a8ba4..7eb2e7729de 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -1,48 +1,80 @@ #pragma once #include +#include #include -#include +#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - class DataTypeObject : public IDataType { -private: - String schema_format; - bool is_nullable; - public: - DataTypeObject(const String & schema_format_, bool is_nullable_); + enum class SchemaFormat + { + JSON = 0, + }; + + /// Don't change these constants, it can break backward compatibility. + static constexpr size_t DEFAULT_MAX_SEPARATELY_STORED_PATHS = 1024; + static constexpr size_t NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR = 4; + static constexpr size_t NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR = 2; + + explicit DataTypeObject( + const SchemaFormat & schema_format_, + std::unordered_map typed_paths_ = {}, + std::unordered_set paths_to_skip_ = {}, + std::vector path_regexps_to_skip_ = {}, + size_t max_dynamic_paths_ = DEFAULT_MAX_SEPARATELY_STORED_PATHS, + size_t max_dynamic_types_ = DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES); + + DataTypeObject(const SchemaFormat & schema_format_, size_t max_dynamic_paths_, size_t max_dynamic_types_); const char * getFamilyName() const override { return "Object"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::Object; } - MutableColumnPtr createColumn() const override { return ColumnObject::create(is_nullable); } + MutableColumnPtr createColumn() const override; - Field getDefault() const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName()); - } + Field getDefault() const override { return Object(); } - bool haveSubtypes() const override { return false; } - bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } - bool hasDynamicSubcolumnsDeprecated() const override { return true; } + bool canBeInsideNullable() const override { return false; } + bool supportsSparseSerialization() const override { return false; } + bool canBeInsideSparseColumns() const override { return false; } + bool isComparable() const override { return false; } + bool haveSubtypes() const override { return false; } + + bool equals(const IDataType & rhs) const override; + + bool hasDynamicSubcolumnsData() const override { return true; } + std::unique_ptr getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override; SerializationPtr doGetDefaultSerialization() const override; - bool hasNullableSubcolumns() const { return is_nullable; } + const SchemaFormat & getSchemaFormat() const { return schema_format; } + const std::unordered_map & getTypedPaths() const { return typed_paths; } + const std::unordered_set & getPathsToSkip() const { return paths_to_skip; } + const std::vector & getPathRegexpsToSkip() const { return path_regexps_to_skip; } - const String & getSchemaFormat() const { return schema_format; } + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + size_t getMaxDynamicPaths() const { return max_dynamic_paths; } + +private: + SchemaFormat schema_format; + /// Set of paths with types that were specified in type declaration. + std::unordered_map typed_paths; + /// Set of paths that should be skipped during data parsing. + std::unordered_set paths_to_skip; + /// List of regular expressions that should be used to skip paths during data parsing. + std::vector path_regexps_to_skip; + /// Limit on the number of paths that can be stored as subcolumn. + size_t max_dynamic_paths; + /// Limit of dynamic types that should be used for Dynamic columns. + size_t max_dynamic_types; }; } diff --git a/src/DataTypes/DataTypeObjectDeprecated.cpp b/src/DataTypes/DataTypeObjectDeprecated.cpp new file mode 100644 index 00000000000..07f9c116e58 --- /dev/null +++ b/src/DataTypes/DataTypeObjectDeprecated.cpp @@ -0,0 +1,87 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNEXPECTED_AST_STRUCTURE; +} + +DataTypeObjectDeprecated::DataTypeObjectDeprecated(const String & schema_format_, bool is_nullable_) + : schema_format(Poco::toLower(schema_format_)) + , is_nullable(is_nullable_) +{ +} + +bool DataTypeObjectDeprecated::equals(const IDataType & rhs) const +{ + if (const auto * object = typeid_cast(&rhs)) + return schema_format == object->schema_format && is_nullable == object->is_nullable; + return false; +} + +SerializationPtr DataTypeObjectDeprecated::doGetDefaultSerialization() const +{ + return getObjectSerialization(schema_format); +} + +String DataTypeObjectDeprecated::doGetName() const +{ + WriteBufferFromOwnString out; + if (is_nullable) + out << "Object(Nullable(" << quote << schema_format << "))"; + else + out << "Object(" << quote << schema_format << ")"; + return out.str(); +} + +static DataTypePtr create(const ASTPtr & arguments) +{ + if (!arguments || arguments->children.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Object data type family must have one argument - name of schema format"); + + ASTPtr schema_argument = arguments->children[0]; + bool is_nullable = false; + + if (const auto * type = schema_argument->as()) + { + if (type->name != "Nullable" || type->arguments->children.size() != 1) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, + "Expected 'Nullable()' as parameter for type Object (function: {})", type->name); + + schema_argument = type->arguments->children[0]; + is_nullable = true; + } + + const auto * literal = schema_argument->as(); + if (!literal || literal->value.getType() != Field::Types::String) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, + "Object data type family must have a const string as its schema name parameter"); + + return std::make_shared(literal->value.safeGet(), is_nullable); +} + +void registerDataTypeObjectDeprecated(DataTypeFactory & factory) +{ + factory.registerDataType("Object", create); + if (Context::getGlobalContextInstance()->getSettingsRef().use_json_alias_for_old_object_type) + factory.registerSimpleDataType("JSON", + [] { return std::make_shared("JSON", false); }, + DataTypeFactory::Case::Insensitive); +} + +} diff --git a/src/DataTypes/DataTypeObjectDeprecated.h b/src/DataTypes/DataTypeObjectDeprecated.h new file mode 100644 index 00000000000..e1f81caaa4f --- /dev/null +++ b/src/DataTypes/DataTypeObjectDeprecated.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class DataTypeObjectDeprecated : public IDataType +{ +private: + String schema_format; + bool is_nullable; + +public: + DataTypeObjectDeprecated(const String & schema_format_, bool is_nullable_); + + const char * getFamilyName() const override { return "Object"; } + String doGetName() const override; + TypeIndex getTypeId() const override { return TypeIndex::ObjectDeprecated; } + + MutableColumnPtr createColumn() const override { return ColumnObjectDeprecated::create(is_nullable); } + + Field getDefault() const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName()); + } + + bool haveSubtypes() const override { return false; } + bool equals(const IDataType & rhs) const override; + bool isParametric() const override { return true; } + bool hasDynamicSubcolumnsDeprecated() const override { return true; } + + SerializationPtr doGetDefaultSerialization() const override; + + bool hasNullableSubcolumns() const { return is_nullable; } + + const String & getSchemaFormat() const { return schema_format; } +}; + +} diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index e96937d522d..75556ed4090 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -192,17 +192,12 @@ MutableColumnPtr DataTypeTuple::createColumn() const MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const { - /// If we read Tuple as Variant subcolumn, it may be wrapped to SerializationVariantElement. - /// Here we don't need it, so we drop this wrapper. - const auto * current_serialization = &serialization; - while (const auto * serialization_variant_element = typeid_cast(current_serialization)) - current_serialization = serialization_variant_element->getNested().get(); - - /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed + /// If we read subcolumn of nested Tuple or this Tuple is a subcolumn, it may be wrapped to SerializationWrapper /// several times to allow to reconstruct the substream path name. /// Here we don't need substream path name, so we drop first several wrapper serializations. - while (const auto * serialization_named = typeid_cast(current_serialization)) - current_serialization = serialization_named->getNested().get(); + const auto * current_serialization = &serialization; + while (const auto * serialization_wrapper = dynamic_cast(current_serialization)) + current_serialization = serialization_wrapper->getNested().get(); const auto * serialization_tuple = typeid_cast(current_serialization); if (!serialization_tuple) diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp index 8a10ca7d06d..cc8d04e94da 100644 --- a/src/DataTypes/DataTypeVariant.cpp +++ b/src/DataTypes/DataTypeVariant.cpp @@ -117,7 +117,7 @@ bool DataTypeVariant::equals(const IDataType & rhs) const /// The same data types with different custom names considered different. /// For example, UInt8 and Bool. - if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName()) + if ((variants[i]->hasCustomName() || rhs_variant.variants[i]->hasCustomName()) && variants[i]->getName() != rhs_variant.variants[i]->getName()) return false; } diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp index bd994e313ba..dc0f2f3f5aa 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.cpp +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -94,8 +95,13 @@ enum class BinaryTypeIndex : uint8_t Bool = 0x2D, SimpleAggregateFunction = 0x2E, Nested = 0x2F, + JSON = 0x30, }; +/// In future we can introduce more arguments in the JSON data type definition. +/// To support such changes, use versioning in the serialization of JSON type. +const UInt8 TYPE_JSON_SERIALIZATION_VERSION = 0; + BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) { /// By default custom types don't have their own BinaryTypeIndex. @@ -202,7 +208,7 @@ BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) return BinaryTypeIndex::LowCardinality; case TypeIndex::Map: return BinaryTypeIndex::Map; - case TypeIndex::Object: + case TypeIndex::ObjectDeprecated: /// Object type will be deprecated and replaced by new implementation. No need to support it here. throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type Object is not supported"); case TypeIndex::IPv4: @@ -216,6 +222,15 @@ BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) /// JSONPaths is used only during schema inference and cannot be used anywhere else. case TypeIndex::JSONPaths: throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type JSONPaths is not supported"); + case TypeIndex::Object: + { + const auto & object_type = assert_cast(*type); + switch (object_type.getSchemaFormat()) + { + case DataTypeObject::SchemaFormat::JSON: + return BinaryTypeIndex::JSON; + } + } } } @@ -444,7 +459,7 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) case BinaryTypeIndex::Dynamic: { const auto & dynamic_type = assert_cast(*type); - /// Maximum number of dynamic types is 255, we can write it as 1 byte. + /// Maximum number of dynamic types is 254, we can write it as 1 byte. writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf); break; } @@ -480,6 +495,30 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) writeStringBinary(type_name, buf); break; } + case BinaryTypeIndex::JSON: + { + const auto & object_type = assert_cast(*type); + /// Write version of the serialization because we can add new arguments in the JSON type. + writeBinary(TYPE_JSON_SERIALIZATION_VERSION, buf); + writeVarUInt(object_type.getMaxDynamicPaths(), buf); + writeBinary(UInt8(object_type.getMaxDynamicTypes()), buf); + const auto & typed_paths = object_type.getTypedPaths(); + writeVarUInt(typed_paths.size(), buf); + for (const auto & [path, path_type] : typed_paths) + { + writeStringBinary(path, buf); + encodeDataType(path_type, buf); + } + const auto & paths_to_skip = object_type.getPathsToSkip(); + writeVarUInt(paths_to_skip.size(), buf); + for (const auto & path : paths_to_skip) + writeStringBinary(path, buf); + const auto & path_regexps_to_skip = object_type.getPathRegexpsToSkip(); + writeVarUInt(path_regexps_to_skip.size(), buf); + for (const auto & regexp : path_regexps_to_skip) + writeStringBinary(regexp, buf); + break; + } default: break; } @@ -691,6 +730,54 @@ DataTypePtr decodeDataType(ReadBuffer & buf) readStringBinary(type_name, buf); return DataTypeFactory::instance().get(type_name); } + case BinaryTypeIndex::JSON: + { + UInt8 serialization_version; + readBinary(serialization_version, buf); + if (serialization_version > TYPE_JSON_SERIALIZATION_VERSION) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected version of JSON type binary encoding"); + size_t max_dynamic_paths; + readVarUInt(max_dynamic_paths, buf); + UInt8 max_dynamic_types; + readBinary(max_dynamic_types, buf); + size_t typed_paths_size; + readVarUInt(typed_paths_size, buf); + std::unordered_map typed_paths; + for (size_t i = 0; i != typed_paths_size; ++i) + { + String path; + readStringBinary(path, buf); + typed_paths[path] = decodeDataType(buf); + } + size_t paths_to_skip_size; + readVarUInt(paths_to_skip_size, buf); + std::unordered_set paths_to_skip; + paths_to_skip.reserve(paths_to_skip_size); + for (size_t i = 0; i != paths_to_skip_size; ++i) + { + String path; + readStringBinary(path, buf); + paths_to_skip.insert(path); + } + + size_t path_regexps_to_skip_size; + readVarUInt(path_regexps_to_skip_size, buf); + std::vector path_regexps_to_skip; + path_regexps_to_skip.reserve(path_regexps_to_skip_size); + for (size_t i = 0; i != path_regexps_to_skip_size; ++i) + { + String regexp; + readStringBinary(regexp, buf); + path_regexps_to_skip.push_back(regexp); + } + return std::make_shared( + DataTypeObject::SchemaFormat::JSON, + typed_paths, + paths_to_skip, + path_regexps_to_skip, + max_dynamic_paths, + max_dynamic_types); + } } throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown type code: {0:#04x}", UInt64(type)); diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h index d02e7f85942..cdfbfee1ccf 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.h +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -8,58 +8,59 @@ namespace DB /** Binary encoding for ClickHouse data types: -|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ClickHouse data type | Binary encoding | -|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Nothing | 0x00 | -| UInt8 | 0x01 | -| UInt16 | 0x02 | -| UInt32 | 0x03 | -| UInt64 | 0x04 | -| UInt128 | 0x05 | -| UInt256 | 0x06 | -| Int8 | 0x07 | -| Int16 | 0x08 | -| Int32 | 0x09 | -| Int64 | 0x0A | -| Int128 | 0x0B | -| Int256 | 0x0C | -| Float32 | 0x0D | -| Float64 | 0x0E | -| Date | 0x0F | -| Date32 | 0x10 | -| DateTime | 0x11 | -| DateTime(time_zone) | 0x12 | -| DateTime64(P) | 0x13 | -| DateTime64(P, time_zone) | 0x14 | -| String | 0x15 | -| FixedString(N) | 0x16 | -| Enum8 | 0x17... | -| Enum16 | 0x18...> | -| Decimal32(P, S) | 0x19 | -| Decimal64(P, S) | 0x1A | -| Decimal128(P, S) | 0x1B | -| Decimal256(P, S) | 0x1C | -| UUID | 0x1D | -| Array(T) | 0x1E | -| Tuple(T1, ..., TN) | 0x1F... | -| Tuple(name1 T1, ..., nameN TN) | 0x20... | -| Set | 0x21 | -| Interval | 0x22 | -| Nullable(T) | 0x23 | -| Function | 0x24... | -| AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | -| LowCardinality(T) | 0x26 | -| Map(K, V) | 0x27 | -| IPv4 | 0x28 | -| IPv6 | 0x29 | -| Variant(T1, ..., TN) | 0x2A... | -| Dynamic(max_types=N) | 0x2B | -| Custom type (Ring, Polygon, etc) | 0x2C | -| Bool | 0x2D | -| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2E...... | -| Nested(name1 T1, ..., nameN TN) | 0x2F... | -|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|---------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ClickHouse data type | Binary encoding | +|---------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Nothing | 0x00 | +| UInt8 | 0x01 | +| UInt16 | 0x02 | +| UInt32 | 0x03 | +| UInt64 | 0x04 | +| UInt128 | 0x05 | +| UInt256 | 0x06 | +| Int8 | 0x07 | +| Int16 | 0x08 | +| Int32 | 0x09 | +| Int64 | 0x0A | +| Int128 | 0x0B | +| Int256 | 0x0C | +| Float32 | 0x0D | +| Float64 | 0x0E | +| Date | 0x0F | +| Date32 | 0x10 | +| DateTime | 0x11 | +| DateTime(time_zone) | 0x12 | +| DateTime64(P) | 0x13 | +| DateTime64(P, time_zone) | 0x14 | +| String | 0x15 | +| FixedString(N) | 0x16 | +| Enum8 | 0x17... | +| Enum16 | 0x18...> | +| Decimal32(P, S) | 0x19 | +| Decimal64(P, S) | 0x1A | +| Decimal128(P, S) | 0x1B | +| Decimal256(P, S) | 0x1C | +| UUID | 0x1D | +| Array(T) | 0x1E | +| Tuple(T1, ..., TN) | 0x1F... | +| Tuple(name1 T1, ..., nameN TN) | 0x20... | +| Set| 0x21 | +| Interval | 0x22 | +| Nullable(T) | 0x23 | +| Function | 0x24... | +| AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | +| LowCardinality(T) | 0x26 | +| Map(K, V) | 0x27 | +| IPv4 | 0x28 | +| IPv6 | 0x29 | +| Variant(T1, ..., TN) | 0x2A... | +| Dynamic(max_types=N) | 0x2B | +| Custom type (Ring, Polygon, etc) | 0x2C | +| Bool | 0x2D | +| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2E...... | +| Nested(name1 T1, ..., nameN TN) | 0x2F... | +| JSON(max_dynamic_paths=N, max_dynamic_types=M, path Type, SKIP skip_path, SKIP REGEXP skip_path_regexp) | 0x30......... | +|---------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| Interval kind binary encoding: |---------------|-----------------| diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index a427fd0717a..1d8f7711de1 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -80,14 +80,14 @@ static DataTypePtr create(const ASTPtr & arguments) const auto * precision_arg = arguments->children[0]->as(); if (!precision_arg || precision_arg->value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument precision is invalid"); - precision = precision_arg->value.get(); + precision = precision_arg->value.safeGet(); if (arguments->children.size() == 2) { const auto * scale_arg = arguments->children[1]->as(); if (!scale_arg || !isInt64OrUInt64FieldType(scale_arg->value.getType())) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument scale is invalid"); - scale = scale_arg->value.get(); + scale = scale_arg->value.safeGet(); } } @@ -107,7 +107,7 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument"); UInt64 precision = DecimalUtils::max_precision; - UInt64 scale = scale_arg->value.get(); + UInt64 scale = scale_arg->value.safeGet(); return createDecimal(precision, scale); } diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 03874279a0b..536d2656021 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -178,8 +178,7 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const template DataTypePtr FieldToDataType::operator() (const Object &) const { - /// TODO: Do we need different parameters for type Object? - return std::make_shared("json", false); + return std::make_shared(DataTypeObject::SchemaFormat::JSON); } template diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 824bc6e33b0..945d36dbb92 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -150,6 +149,8 @@ std::unique_ptr IDataType::getSubcolumnData( ISerialization::EnumerateStreamsSettings settings; settings.position_independent_encoding = false; + /// Don't enumerate dynamic subcolumns, they are handled separately. + settings.enumerate_dynamic_streams = false; data.serialization->enumerateStreams(settings, callback_with_data, data); if (!res && data.type->hasDynamicSubcolumnsData()) @@ -363,9 +364,10 @@ bool isArray(TYPE data_type) { return WhichDataType(data_type).isArray(); } \ bool isTuple(TYPE data_type) { return WhichDataType(data_type).isTuple(); } \ bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \ bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \ -bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ +bool isObjectDeprecated(TYPE data_type) { return WhichDataType(data_type).isObjectDeprecated(); } \ bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \ bool isDynamic(TYPE data_type) { return WhichDataType(data_type).isDynamic(); } \ +bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \ \ bool isColumnedAsNumber(TYPE data_type) \ diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 397ae3d8be9..a7665e610ab 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -432,7 +432,7 @@ struct WhichDataType constexpr bool isMap() const {return idx == TypeIndex::Map; } constexpr bool isSet() const { return idx == TypeIndex::Set; } constexpr bool isInterval() const { return idx == TypeIndex::Interval; } - constexpr bool isObject() const { return idx == TypeIndex::Object; } + constexpr bool isObjectDeprecated() const { return idx == TypeIndex::ObjectDeprecated; } constexpr bool isNothing() const { return idx == TypeIndex::Nothing; } constexpr bool isNullable() const { return idx == TypeIndex::Nullable; } @@ -444,6 +444,7 @@ struct WhichDataType constexpr bool isVariant() const { return idx == TypeIndex::Variant; } constexpr bool isDynamic() const { return idx == TypeIndex::Dynamic; } + constexpr bool isObject() const { return idx == TypeIndex::Object; } }; /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) @@ -502,9 +503,10 @@ bool isArray(TYPE data_type); \ bool isTuple(TYPE data_type); \ bool isMap(TYPE data_type); \ bool isInterval(TYPE data_type); \ -bool isObject(TYPE data_type); \ +bool isObjectDeprecated(TYPE data_type); \ bool isVariant(TYPE data_type); \ bool isDynamic(TYPE data_type); \ +bool isObject(TYPE data_type); \ bool isNothing(TYPE data_type); \ \ bool isColumnedAsNumber(TYPE data_type); \ diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 356e609e77a..fb64199a1b0 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -135,7 +135,7 @@ Array createEmptyArrayField(size_t num_dimensions) for (size_t i = 1; i < num_dimensions; ++i) { current_array->push_back(Array()); - current_array = ¤t_array->back().get(); + current_array = ¤t_array->back().safeGet(); } return array; @@ -180,12 +180,12 @@ static DataTypePtr recreateTupleWithElements(const DataTypeTuple & type_tuple, c } static std::pair convertObjectColumnToTuple( - const ColumnObject & column_object, const DataTypeObject & type_object) + const ColumnObjectDeprecated & column_object, const DataTypeObjectDeprecated & type_object) { if (!column_object.isFinalized()) { auto finalized = column_object.cloneFinalized(); - const auto & finalized_object = assert_cast(*finalized); + const auto & finalized_object = assert_cast(*finalized); return convertObjectColumnToTuple(finalized_object, type_object); } @@ -211,9 +211,9 @@ static std::pair recursivlyConvertDynamicColumnToTuple( if (!type->hasDynamicSubcolumnsDeprecated()) return {column, type}; - if (const auto * type_object = typeid_cast(type.get())) + if (const auto * type_object = typeid_cast(type.get())) { - const auto & column_object = assert_cast(*column); + const auto & column_object = assert_cast(*column); return convertObjectColumnToTuple(column_object, *type_object); } @@ -369,7 +369,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che for (const auto & [key, subtypes] : subcolumns_types) { assert(!subtypes.empty()); - if (key.getPath() == ColumnObject::COLUMN_NAME_DUMMY) + if (key.getPath() == ColumnObjectDeprecated::COLUMN_NAME_DUMMY) continue; size_t first_dim = getNumberOfDimensions(*subtypes[0]); @@ -385,7 +385,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che if (tuple_paths.empty()) { - tuple_paths.emplace_back(ColumnObject::COLUMN_NAME_DUMMY); + tuple_paths.emplace_back(ColumnObjectDeprecated::COLUMN_NAME_DUMMY); tuple_types.emplace_back(std::make_shared()); } @@ -452,7 +452,7 @@ static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl( if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; - if (isObject(type_in_storage)) + if (isObjectDeprecated(type_in_storage)) return getLeastCommonTypeForObject(concrete_types, check_ambiguos_paths); if (const auto * type_array = typeid_cast(type_in_storage.get())) @@ -494,9 +494,9 @@ DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; - if (isObject(type_in_storage)) + if (isObjectDeprecated(type_in_storage)) return std::make_shared( - DataTypes{std::make_shared()}, Names{ColumnObject::COLUMN_NAME_DUMMY}); + DataTypes{std::make_shared()}, Names{ColumnObjectDeprecated::COLUMN_NAME_DUMMY}); if (const auto * type_array = typeid_cast(type_in_storage.get())) return std::make_shared( @@ -838,7 +838,7 @@ DataTypePtr unflattenTuple(const PathsInData & paths, const DataTypes & tuple_ty return unflattenTuple(paths, tuple_types, tuple_columns).second; } -std::pair unflattenObjectToTuple(const ColumnObject & column) +std::pair unflattenObjectToTuple(const ColumnObjectDeprecated & column) { const auto & subcolumns = column.getSubcolumns(); @@ -846,7 +846,7 @@ std::pair unflattenObjectToTuple(const ColumnObject & co { auto type = std::make_shared( DataTypes{std::make_shared()}, - Names{ColumnObject::COLUMN_NAME_DUMMY}); + Names{ColumnObjectDeprecated::COLUMN_NAME_DUMMY}); return {type->createColumn()->cloneResized(column.size()), type}; } diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 21e5c3b2f59..d4109b971a4 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { @@ -88,7 +88,7 @@ DataTypePtr unflattenTuple( const PathsInData & paths, const DataTypes & tuple_types); -std::pair unflattenObjectToTuple(const ColumnObject & column); +std::pair unflattenObjectToTuple(const ColumnObjectDeprecated & column); std::pair unflattenTuple( const PathsInData & paths, diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 7642a6619b3..338edc3a144 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -202,6 +202,12 @@ String getNameForSubstreamPath( stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; + else if (it->type == SubstreamType::ObjectStructure) + stream_name += ".object_structure"; + else if (it->type == SubstreamType::ObjectSharedData) + stream_name += ".object_shared_data"; + else if (it->type == SubstreamType::ObjectTypedPath || it->type == SubstreamType::ObjectDynamicPath) + stream_name += "." + it->object_path_name; } return stream_name; @@ -401,7 +407,17 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes || path[last_elem].type == Substream::VariantElement - || path[last_elem].type == Substream::VariantElementNullMap; + || path[last_elem].type == Substream::VariantElementNullMap + || path[last_elem].type == Substream::ObjectTypedPath; +} + +bool ISerialization::isEphemeralSubcolumn(const DB::ISerialization::SubstreamPath & path, size_t prefix_len) +{ + if (prefix_len == 0 || prefix_len > path.size()) + return false; + + size_t last_elem = prefix_len - 1; + return path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 5d0bf60c59f..33575a07177 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -176,8 +176,8 @@ public: SparseElements, SparseOffsets, - ObjectStructure, - ObjectData, + DeprecatedObjectStructure, + DeprecatedObjectData, VariantDiscriminators, NamedVariantDiscriminators, @@ -189,6 +189,12 @@ public: DynamicData, DynamicStructure, + ObjectData, + ObjectTypedPath, + ObjectDynamicPath, + ObjectSharedData, + ObjectStructure, + Regular, }; @@ -203,6 +209,9 @@ public: /// Name of substream for type from 'named_types'. String name_of_substream; + /// Path name for Object type elements. + String object_path_name; + /// Data for current substream. SubstreamData data; @@ -232,6 +241,10 @@ public: { SubstreamPath path; bool position_independent_encoding = true; + /// If set to false, don't enumerate dynamic subcolumns + /// (such as dynamic types in Dynamic column or dynamic paths in JSON column). + /// It may be needed when dynamic subcolumns are processed separately. + bool enumerate_dynamic_streams = true; }; virtual void enumerateStreams( @@ -263,13 +276,13 @@ public: bool use_compact_variant_discriminators_serialization = false; - enum class DynamicStatisticsMode + enum class ObjectAndDynamicStatisticsMode { NONE, /// Don't write statistics. PREFIX, /// Write statistics in prefix. SUFFIX, /// Write statistics in suffix. }; - DynamicStatisticsMode dynamic_write_statistics = DynamicStatisticsMode::NONE; + ObjectAndDynamicStatisticsMode object_and_dynamic_write_statistics = ObjectAndDynamicStatisticsMode::NONE; }; struct DeserializeBinaryBulkSettings @@ -290,7 +303,7 @@ public: /// If not zero, may be used to avoid reallocations while reading column of String type. double avg_value_size_hint = 0; - bool dynamic_read_statistics = false; + bool object_and_dynamic_read_statistics = false; }; /// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark. @@ -440,6 +453,10 @@ public: static bool hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len); static SubstreamData createFromPath(const SubstreamPath & path, size_t prefix_len); + /// Returns true if subcolumn doesn't actually stores any data in column and doesn't require a separate stream + /// for writing/reading data. For example, it's a null-map subcolumn of Variant type (it's always constructed from discriminators);. + static bool isEphemeralSubcolumn(const SubstreamPath & path, size_t prefix_len); + protected: template State * checkAndGetState(const StatePtr & state) const; diff --git a/src/DataTypes/Serializations/JSONDataParser.cpp b/src/DataTypes/Serializations/JSONDataParser.cpp index 56641424396..0f74815f5b4 100644 --- a/src/DataTypes/Serializations/JSONDataParser.cpp +++ b/src/DataTypes/Serializations/JSONDataParser.cpp @@ -131,7 +131,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.size() == ctx.current_size) @@ -154,7 +154,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.empty()) diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 55f7641e058..41b198890e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -16,14 +16,14 @@ namespace DB void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const AggregateFunctionStateData & state = field.get(); + const AggregateFunctionStateData & state = field.safeGet(); writeBinary(state.data, ostr); } void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = AggregateFunctionStateData(); - AggregateFunctionStateData & s = field.get(); + AggregateFunctionStateData & s = field.safeGet(); readBinary(s.data, istr); s.name = type_name; } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index b7d43332085..0a9c4529e23 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -29,7 +29,7 @@ static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40; void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const Array & a = field.get(); + const Array & a = field.safeGet(); writeVarUInt(a.size(), ostr); for (const auto & i : a) { @@ -51,7 +51,7 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con settings.binary.max_binary_string_size); field = Array(); - Array & arr = field.get(); + Array & arr = field.safeGet(); arr.reserve(size); for (size_t i = 0; i < size; ++i) nested->deserializeBinary(arr.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 49dc042e872..8927f949368 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -13,7 +13,7 @@ namespace DB template void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - FieldType x = field.get>(); + FieldType x = field.safeGet>(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 7609ffc91ca..b921a3bc897 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -27,15 +27,21 @@ namespace ErrorCodes struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState { SerializationDynamic::DynamicStructureSerializationVersion structure_version; + size_t max_dynamic_types; DataTypePtr variant_type; Names variant_names; SerializationPtr variant_serialization; ISerialization::SerializeBinaryBulkStatePtr variant_state; - /// Variants statistics. Map (Variant name) -> (Variant size). - ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} }; + /// Variants statistics. + ColumnDynamic::Statistics statistics; + /// If true, statistics will be recalculated during serialization. + bool recalculate_statistics = false; - explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} + explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) + : structure_version(structure_version_), statistics(ColumnDynamic::Statistics::Source::READ) + { + } }; struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState @@ -58,7 +64,7 @@ void SerializationDynamic::enumerateStreams( const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants. - if (!column_dynamic && !deserialize_state) + if (!settings.enumerate_dynamic_streams || (!column_dynamic && !deserialize_state)) return; const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_state->structure_state)->variant_type; @@ -106,20 +112,41 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( writeBinaryLittleEndian(structure_version, *stream); auto dynamic_state = std::make_shared(structure_version); + dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes(); + /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types + /// that is specified in the Dynamic type (we could decrease it before merge). + writeVarUInt(dynamic_state->max_dynamic_types, *stream); + dynamic_state->variant_type = variant_info.variant_type; dynamic_state->variant_names = variant_info.variant_names; const auto & variant_column = column_dynamic.getVariantColumn(); - /// Write internal Variant type name. + /// Write information about variants. + size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it. + writeVarUInt(num_variants, *stream); if (settings.data_types_binary_encoding) - encodeDataType(dynamic_state->variant_type, *stream); + { + const auto & variants = assert_cast(*dynamic_state->variant_type).getVariants(); + for (const auto & variant: variants) + { + if (variant->getName() != ColumnDynamic::getSharedVariantTypeName()) + encodeDataType(variant, *stream); + } + } else - writeStringBinary(dynamic_state->variant_type->getName(), *stream); + { + for (const auto & name : dynamic_state->variant_names) + { + if (name != ColumnDynamic::getSharedVariantTypeName()) + writeStringBinary(name, *stream); + } + } /// Write statistics in prefix if needed. - if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) + if (settings.object_and_dynamic_write_statistics == SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::PREFIX) { const auto & statistics = column_dynamic.getStatistics(); + /// First, write statistics for usual variants. for (size_t i = 0; i != variant_info.variant_names.size(); ++i) { size_t size = 0; @@ -129,13 +156,55 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( /// - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ). /// We can rely only on statistics calculated during the merge, because column with statistics that was read /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated. - if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE) - size = statistics.data.at(variant_info.variant_names[i]); + if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE) + size = statistics->variants_statistics.at(variant_info.variant_names[i]); /// Otherwise we can use only variant sizes from current column. else size = variant_column.getVariantByGlobalDiscriminator(i).size(); writeVarUInt(size, *stream); } + + /// Second, write statistics for variants in shared variant. + /// Check if we have statistics calculated during merge of some data parts (Statistics::Source::MERGE). + if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE) + { + writeVarUInt(statistics->shared_variants_statistics.size(), *stream); + for (const auto & [variant_name, size] : statistics->shared_variants_statistics) + { + writeStringBinary(variant_name, *stream); + writeVarUInt(size, *stream); + } + } + /// If we don't have statistics for shared variants from merge, calculate it from the column. + else + { + std::unordered_map shared_variants_statistics; + const auto & shared_variant = column_dynamic.getSharedVariant(); + for (size_t i = 0; i != shared_variant.size(); ++i) + { + auto value = shared_variant.getDataAt(i); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + if (auto it = shared_variants_statistics.find(type_name); it != shared_variants_statistics.end()) + ++it->second; + else if (shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE) + shared_variants_statistics.emplace(type_name, 1); + } + + writeVarUInt(shared_variants_statistics.size(), *stream); + for (const auto & [variant_name, size] : shared_variants_statistics) + { + writeStringBinary(variant_name, *stream); + writeVarUInt(size, *stream); + } + } + } + /// Otherwise statistics will be written in the suffix, in this case we will recalculate + /// statistics during serialization to make it more precise. + else + { + dynamic_state->recalculate_statistics = true; } dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization(); @@ -156,8 +225,8 @@ void SerializationDynamic::deserializeBinaryBulkStatePrefix( return; auto dynamic_state = std::make_shared(); - dynamic_state->structure_state = structure_state; - dynamic_state->variant_serialization = checkAndGetState(structure_state)->variant_type->getDefaultSerialization(); + dynamic_state->structure_state = std::move(structure_state); + dynamic_state->variant_serialization = checkAndGetState(dynamic_state->structure_state)->variant_type->getDefaultSerialization(); settings.path.push_back(Substream::DynamicData); dynamic_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_state->variant_state, cache); @@ -174,7 +243,7 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD DeserializeBinaryBulkStatePtr state = nullptr; if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) { - state = cached_state; + state = std::move(cached_state); } else if (auto * structure_stream = settings.getter(settings.path)) { @@ -182,33 +251,53 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD UInt64 structure_version; readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); - /// Read internal Variant type name. + /// Read max_dynamic_types parameter. + readVarUInt(structure_state->max_dynamic_types, *structure_stream); + /// Read information about variants. + DataTypes variants; + size_t num_variants; + readVarUInt(num_variants, *structure_stream); + variants.reserve(num_variants + 1); /// +1 for shared variant. if (settings.data_types_binary_encoding) { - structure_state->variant_type = decodeDataType(*structure_stream); + for (size_t i = 0; i != num_variants; ++i) + variants.push_back(decodeDataType(*structure_stream)); } else { String data_type_name; - readStringBinary(data_type_name, *structure_stream); - structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); - } - const auto * variant_type = typeid_cast(structure_state->variant_type.get()); - if (!variant_type) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); - - /// Read statistics. - if (settings.dynamic_read_statistics) - { - const auto & variants = variant_type->getVariants(); - size_t variant_size; - for (const auto & variant : variants) + for (size_t i = 0; i != num_variants; ++i) { - readVarUInt(variant_size, *structure_stream); - structure_state->statistics.data[variant->getName()] = variant_size; + readStringBinary(data_type_name, *structure_stream); + variants.push_back(DataTypeFactory::instance().get(data_type_name)); } } + /// Add shared variant, Dynamic column should always have it. + variants.push_back(ColumnDynamic::getSharedVariantDataType()); + auto variant_type = std::make_shared(variants); + /// Read statistics. + if (settings.object_and_dynamic_read_statistics) + { + ColumnDynamic::Statistics statistics(ColumnDynamic::Statistics::Source::READ); + /// First, read statistics for usual variants. + for (const auto & variant : variant_type->getVariants()) + readVarUInt(statistics.variants_statistics[variant->getName()], *structure_stream); + + /// Second, read statistics for shared variants. + size_t statistics_size; + readVarUInt(statistics_size, *structure_stream); + String variant_name; + for (size_t i = 0; i != statistics_size; ++i) + { + readStringBinary(variant_name, *structure_stream); + readVarUInt(statistics.shared_variants_statistics[variant_name], *structure_stream); + } + + structure_state->statistics = std::make_shared(std::move(statistics)); + } + + structure_state->variant_type = std::move(variant_type); state = structure_state; addToSubstreamsDeserializeStatesCache(cache, settings.path, state); } @@ -226,13 +315,21 @@ void SerializationDynamic::serializeBinaryBulkStateSuffix( settings.path.pop_back(); if (!stream) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state suffix"); /// Write statistics in suffix if needed. - if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX) + if (settings.object_and_dynamic_write_statistics == SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::SUFFIX) { + /// First, write statistics for usual variants. for (const auto & variant_name : dynamic_state->variant_names) - writeVarUInt(dynamic_state->statistics.data[variant_name], *stream); + writeVarUInt(dynamic_state->statistics.variants_statistics[variant_name], *stream); + /// Second, write statistics for shared variants. + writeVarUInt(dynamic_state->statistics.shared_variants_statistics.size(), *stream); + for (const auto & [variant_name, size] : dynamic_state->statistics.shared_variants_statistics) + { + writeStringBinary(variant_name, *stream); + writeVarUInt(size, *stream); + } } settings.path.push_back(Substream::DynamicData); @@ -246,6 +343,18 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const +{ + size_t tmp_size; + serializeBinaryBulkWithMultipleStreamsAndCountTotalSizeOfVariants(column, offset, limit, settings, state, tmp_size); +} + +void SerializationDynamic::serializeBinaryBulkWithMultipleStreamsAndCountTotalSizeOfVariants( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state, + size_t & total_size_of_variants) const { const auto & column_dynamic = assert_cast(column); auto * dynamic_state = checkAndGetState(state); @@ -255,9 +364,46 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); + if (column_dynamic.getMaxDynamicTypes() != dynamic_state->max_dynamic_types) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_types parameter of Dynamic. Expected: {}, Got: {}", dynamic_state->max_dynamic_types, column_dynamic.getMaxDynamicTypes()); + settings.path.push_back(Substream::DynamicData); assert_cast(*dynamic_state->variant_serialization) - .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data); + .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( + *variant_column, + offset, + limit, + settings, + dynamic_state->variant_state, + dynamic_state->statistics.variants_statistics, + total_size_of_variants); + + if (dynamic_state->recalculate_statistics) + { + /// Calculate statistics for shared variants. + const auto & shared_variant = column_dynamic.getSharedVariant(); + if (!shared_variant.empty()) + { + const auto & local_discriminators = variant_column->getLocalDiscriminators(); + const auto & offsets = variant_column->getOffsets(); + const auto shared_variant_discr = variant_column->localDiscriminatorByGlobal(column_dynamic.getSharedVariantDiscriminator()); + size_t end = limit == 0 || offset + limit > local_discriminators.size() ? local_discriminators.size() : offset + limit; + for (size_t i = offset; i != end; ++i) + { + if (local_discriminators[i] == shared_variant_discr) + { + auto value = shared_variant.getDataAt(offsets[i]); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + if (auto it = dynamic_state->statistics.shared_variants_statistics.find(type_name); it != dynamic_state->statistics.shared_variants_statistics.end()) + ++it->second; + else if (dynamic_state->statistics.shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE) + dynamic_state->statistics.shared_variants_statistics.emplace(type_name, 1); + } + } + } + } settings.path.pop_back(); } @@ -272,13 +418,17 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( return; auto mutable_column = column->assumeMutable(); + auto & column_dynamic = assert_cast(*mutable_column); auto * dynamic_state = checkAndGetState(state); auto * structure_state = checkAndGetState(dynamic_state->structure_state); if (mutable_column->empty()) - mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics); + { + column_dynamic.setMaxDynamicPaths(structure_state->max_dynamic_types); + column_dynamic.setVariantType(structure_state->variant_type); + column_dynamic.setStatistics(structure_state->statistics); + } - auto & column_dynamic = assert_cast(*mutable_column); const auto & variant_info = column_dynamic.getVariantInfo(); if (!variant_info.variant_type->equals(*structure_state->variant_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName()); @@ -329,24 +479,42 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu encodeDataType(std::make_shared(), ostr); return; } + /// Check if this value is in shared variant. In this case it's already + /// in desired binary format. + else if (global_discr == dynamic_column.getSharedVariantDiscriminator()) + { + auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num)); + ostr.write(value.data, value.size); + return; + } const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); + const auto & variant_type_name = variant_info.variant_names[global_discr]; encodeDataType(variant_type, ostr); - variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); + dynamic_column.getVariantSerialization(variant_type, variant_type_name)->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); } -template -static void deserializeVariant( +template +static ReturnType deserializeVariant( ColumnVariant & variant_column, - const DataTypePtr & variant_type, + const SerializationPtr & variant_serialization, ColumnVariant::Discriminator global_discr, ReadBuffer & istr, DeserializeFunc deserialize) { auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr); - deserialize(*variant_type->getDefaultSerialization(), variant, istr); + if constexpr (std::is_same_v) + { + if (!deserialize(*variant_serialization, variant, istr)) + return ReturnType(false); + } + else + { + deserialize(*variant_serialization, variant, istr); + } variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr)); variant_column.getOffsets().push_back(variant.size() - 1); + return ReturnType(true); } void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -360,11 +528,12 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr } auto variant_type_name = variant_type->getName(); + const auto & variant_serialization = dynamic_column.getVariantSerialization(variant_type, variant_type_name); const auto & variant_info = dynamic_column.getVariantInfo(); auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); if (it != variant_info.variant_name_to_discriminator.end()) { - deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); return; } @@ -372,25 +541,15 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr if (dynamic_column.addNewVariant(variant_type)) { auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); - deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); return; } /// We reached maximum number of variants and couldn't add new variant. - /// This case should be really rare in real use cases. - /// We should always be able to add String variant and insert value as String. - dynamic_column.addStringVariant(); + /// In this case we insert this value into shared variant in binary form. auto tmp_variant_column = variant_type->createColumn(); - variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings); - auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); - auto & variant_column = dynamic_column.getVariantColumn(); - variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0); -} - -void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings); + variant_serialization->deserializeBinary(*tmp_variant_column, istr, settings); + dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0); } template @@ -406,6 +565,7 @@ static void deserializeTextImpl( auto & dynamic_column = assert_cast(column); auto & variant_column = dynamic_column.getVariantColumn(); const auto & variant_info = dynamic_column.getVariantInfo(); + const auto & variant_types = assert_cast(*variant_info.variant_type).getVariants(); String field = read_field(istr); auto field_buf = std::make_unique(field); JSONInferenceInfo json_info; @@ -413,27 +573,81 @@ static void deserializeTextImpl( if (escaping_rule == FormatSettings::EscapingRule::JSON) transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info); - if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type)) + /// If inferred type is not complete, we cannot add it as a new variant. + /// Let's try to deserialize this field into existing variants. + /// If failed, insert this value as String. + if (!checkIfTypeIsComplete(variant_type)) + { + size_t shared_variant_discr = dynamic_column.getSharedVariantDiscriminator(); + for (size_t i = 0; i != variant_types.size(); ++i) + { + field_buf = std::make_unique(field); + if (i != shared_variant_discr + && deserializeVariant( + variant_column, + dynamic_column.getVariantSerialization(variant_types[i], variant_info.variant_names[i]), + i, + *field_buf, + try_deserialize_variant)) + return; + } + + variant_type = std::make_shared(); + /// To be able to deserialize field as String with Quoted escaping rule, it should be quoted. + if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\'')) + field = "'" + field + "'"; + } + else if (dynamic_column.addNewVariant(variant_type, variant_type->getName())) { auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName()); - deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant); + deserializeVariant(dynamic_column.getVariantColumn(), dynamic_column.getVariantSerialization(variant_type), discr, *field_buf, deserialize_variant); return; } - /// We couldn't infer type or add new variant. Try to insert field into current variants. + /// We couldn't infer type or add new variant. Insert it into shared variant. + auto tmp_variant_column = variant_type->createColumn(); field_buf = std::make_unique(field); - if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf)) - return; + auto variant_type_name = variant_type->getName(); + deserialize_variant(*dynamic_column.getVariantSerialization(variant_type, variant_type_name), *tmp_variant_column, *field_buf); + dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0); +} - /// We couldn't insert field into any existing variant, add String variant and read value as String. - dynamic_column.addStringVariant(); +template +static void serializeTextImpl( + const IColumn & column, + size_t row_num, + WriteBuffer & ostr, + const FormatSettings & settings, + NestedSerialize nested_serialize) +{ + const auto & dynamic_column = assert_cast(column); + const auto & variant_column = dynamic_column.getVariantColumn(); + /// Check if this row has value in shared variant. In this case we should first deserialize it from binary format. + if (variant_column.globalDiscriminatorAt(row_num) == dynamic_column.getSharedVariantDiscriminator()) + { + auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num)); + ReadBufferFromMemory buf(value.data, value.size); + auto variant_type = decodeDataType(buf); + auto tmp_variant_column = variant_type->createColumn(); + auto variant_serialization = dynamic_column.getVariantSerialization(variant_type); + variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings); + nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr); + } + /// Otherwise just use serialization for Variant. + else + { + nested_serialize(*dynamic_column.getVariantInfo().variant_type->getDefaultSerialization(), variant_column, row_num, ostr); + } +} - if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\'')) - field = "'" + field + "'"; +void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextCSV(col, row, buf, settings); + }; - field_buf = std::make_unique(field); - auto string_discr = variant_info.variant_name_to_discriminator.at("String"); - deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared(), string_discr, *field_buf, deserialize_variant); + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -466,8 +680,12 @@ bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings); + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextEscaped(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -500,8 +718,12 @@ bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::R void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings); + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextQuoted(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -533,9 +755,19 @@ bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::Re } void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextJSON(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); +} + +void SerializationDynamic::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const { const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSONPretty(dynamic_column.getVariantColumn(), row_num, ostr, settings, indent); } void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -568,8 +800,12 @@ bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::Read void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings); + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextRaw(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -602,8 +838,12 @@ bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadB void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings); + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeText(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -636,8 +876,12 @@ bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::Rea void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & dynamic_column = assert_cast(column); - dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings); + auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf) + { + serialization.serializeTextXML(col, row, buf, settings); + }; + + serializeTextImpl(column, row_num, ostr, settings, nested_serialize); } } diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h index 001a3cf87ce..f34b5d0e770 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.h +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -11,7 +12,7 @@ class SerializationDynamicElement; class SerializationDynamic : public ISerialization { public: - explicit SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) + explicit SerializationDynamic(size_t max_dynamic_types_ = DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES) : max_dynamic_types(max_dynamic_types_) { } @@ -59,6 +60,14 @@ public: SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override; + void serializeBinaryBulkWithMultipleStreamsAndCountTotalSizeOfVariants( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state, + size_t & total_size_of_variants) const; + void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, @@ -89,6 +98,7 @@ public: bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; @@ -105,9 +115,13 @@ private: { DynamicStructureSerializationVersion structure_version; DataTypePtr variant_type; - ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}}; + size_t max_dynamic_types; + ColumnDynamic::StatisticsPtr statistics; - explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {} + explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) + : structure_version(structure_version_) + { + } }; size_t max_dynamic_types; diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index 211f0ac9377..a16186abf2e 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -4,7 +4,10 @@ #include #include #include +#include #include +#include +#include #include namespace DB @@ -21,6 +24,8 @@ struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::Deseria ISerialization::DeserializeBinaryBulkStatePtr structure_state; SerializationPtr variant_serialization; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; + bool read_from_shared_variant; + ColumnPtr shared_variant; }; void SerializationDynamicElement::enumerateStreams( @@ -48,6 +53,7 @@ void SerializationDynamicElement::enumerateStreams( .withColumn(data.column) .withSerializationInfo(data.serialization_info) .withDeserializeState(deserialize_state->variant_element_state); + settings.path.back().data = variant_data; deserialize_state->variant_serialization->enumerateStreams(settings, callback, variant_data); settings.path.pop_back(); } @@ -73,9 +79,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( auto dynamic_element_state = std::make_shared(); dynamic_element_state->structure_state = std::move(structure_state); - const auto & variant_type = checkAndGetState(dynamic_element_state->structure_state)->variant_type; + const auto & variant_type = assert_cast( + *checkAndGetState(dynamic_element_state->structure_state)->variant_type); /// Check if we actually have required element in the Variant. - if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) + if (auto global_discr = variant_type.tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); if (is_null_map_subcolumn) @@ -83,6 +90,21 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( else dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); + dynamic_element_state->read_from_shared_variant = false; + settings.path.pop_back(); + } + /// If we don't have this element in the Variant, we will read shared variant and try to find it there. + else + { + auto shared_variant_global_discr = variant_type.tryGetVariantDiscriminator(ColumnDynamic::getSharedVariantTypeName()); + chassert(shared_variant_global_discr.has_value()); + settings.path.push_back(Substream::DynamicData); + dynamic_element_state->variant_serialization = std::make_shared( + ColumnDynamic::getSharedVariantDataType()->getDefaultSerialization(), + ColumnDynamic::getSharedVariantTypeName(), + *shared_variant_global_discr); + dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); + dynamic_element_state->read_from_shared_variant = true; settings.path.pop_back(); } @@ -115,23 +137,103 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( auto * dynamic_element_state = checkAndGetState(state); - if (dynamic_element_state->variant_serialization) + /// Check if this subcolumn should not be read from shared variant. + /// In this case just read data from the corresponding variant. + if (!dynamic_element_state->read_from_shared_variant) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); + dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams( + result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } - else if (is_null_map_subcolumn) - { - auto mutable_column = result_column->assumeMutable(); - auto & data = assert_cast(*mutable_column).getData(); - data.resize_fill(data.size() + limit, 1); - } + /// Otherwise, read the shared variant column and extract requested type from it. else { - auto mutable_column = result_column->assumeMutable(); - mutable_column->insertManyDefaults(limit); - result_column = std::move(mutable_column); + settings.path.push_back(Substream::DynamicData); + /// Initialize shared_variant column if needed. + if (result_column->empty()) + dynamic_element_state->shared_variant = makeNullable(ColumnDynamic::getSharedVariantDataType()->createColumn()); + size_t prev_size = result_column->size(); + dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams( + dynamic_element_state->shared_variant, limit, settings, dynamic_element_state->variant_element_state, cache); + settings.path.pop_back(); + + /// If we need to read a subcolumn from variant column, create an empty variant column, fill it and extract subcolumn. + auto variant_type = DataTypeFactory::instance().get(dynamic_element_name); + auto result_type = makeNullableOrLowCardinalityNullableSafe(variant_type); + MutableColumnPtr variant_column = nested_subcolumn.empty() || is_null_map_subcolumn ? result_column->assumeMutable() : result_type->createColumn(); + variant_column->reserve(variant_column->size() + limit); + MutableColumnPtr non_nullable_variant_column = variant_column->assumeMutable(); + NullMap * null_map = nullptr; + bool is_low_cardinality_nullable = isColumnLowCardinalityNullable(*variant_column); + /// Resulting subolumn can be Nullable, but value is serialized in shared variant as non-Nullable. + /// Extract non-nullable column and remember the null map to fill it during deserialization. + if (isColumnNullable(*variant_column)) + { + auto & nullable_variant_column = assert_cast(*variant_column); + non_nullable_variant_column = nullable_variant_column.getNestedColumnPtr()->assumeMutable(); + null_map = &nullable_variant_column.getNullMapData(); + } + else if (is_null_map_subcolumn) + { + null_map = &assert_cast(*variant_column).getData(); + } + + auto variant_serialization = variant_type->getDefaultSerialization(); + + const auto & nullable_shared_variant = assert_cast(*dynamic_element_state->shared_variant); + const auto & shared_null_map = nullable_shared_variant.getNullMapData(); + const auto & shared_variant = assert_cast(nullable_shared_variant.getNestedColumn()); + const FormatSettings format_settings; + for (size_t i = prev_size; i != shared_variant.size(); ++i) + { + if (!shared_null_map[i]) + { + auto value = shared_variant.getDataAt(i); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + if (type->getName() == dynamic_element_name) + { + /// When requested type is LowCardinality the subcolumn type name will be LowCardinality(Nullable). + /// Value in shared variant is serialized as LowCardinality and we cannot simply deserialize it + /// inside LowCardinality(Nullable) column (it will try to deserialize null bit). In this case we + /// have to create temporary LowCardinality column, deserialize value into it and insert it into + /// resulting LowCardinality(Nullable) (insertion from LowCardinality column to LowCardinality(Nullable) + /// column is allowed). + if (is_low_cardinality_nullable) + { + auto tmp_column = variant_type->createColumn(); + variant_serialization->deserializeBinary(*tmp_column, buf, format_settings); + non_nullable_variant_column->insertFrom(*tmp_column, 0); + } + else if (is_null_map_subcolumn) + { + null_map->push_back(0); + } + else + { + variant_serialization->deserializeBinary(*non_nullable_variant_column, buf, format_settings); + if (null_map) + null_map->push_back(0); + } + } + else + { + variant_column->insertDefault(); + } + } + else + { + variant_column->insertDefault(); + } + } + + /// Extract nested subcolumn if needed. + if (!nested_subcolumn.empty() && !is_null_map_subcolumn) + { + auto subcolumn = result_type->getSubcolumn(nested_subcolumn, variant_column->getPtr()); + result_column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size()); + } } } diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 127d14a55e0..c674cf479ae 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,15 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + /// Nested subcolumn of a type dynamic type. For example, for `Tuple(a UInt32)`.a + /// subcolumn dynamic_element_name = 'Tuple(a UInt32)' and nested_subcolumn = 'a'. + /// Needed to extract nested subcolumn from values in shared variant. + String nested_subcolumn; bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) - : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, const String & nested_subcolumn_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), nested_subcolumn(nested_subcolumn_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index f919dc16d33..688c71792fa 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -28,7 +28,7 @@ static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30; void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const String & s = field.get(); + const String & s = field.safeGet(); ostr.write(s.data(), std::min(s.size(), n)); if (s.size() < n) for (size_t i = s.size(); i < n; ++i) @@ -39,7 +39,7 @@ void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(n); istr.readStrict(s.data(), n); } diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index dfcd24aff58..c1beceb4533 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -125,7 +125,7 @@ bool SerializationIP::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB template void SerializationIP::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - IPv x = field.get(); + IPv x = field.safeGet(); if constexpr (std::is_same_v) writeBinary(x, ostr); else diff --git a/src/DataTypes/Serializations/SerializationJSON.cpp b/src/DataTypes/Serializations/SerializationJSON.cpp new file mode 100644 index 00000000000..092ccd1c5a5 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationJSON.cpp @@ -0,0 +1,405 @@ +#include +#include +#include + +#if USE_SIMDJSON +#include +#endif +#if USE_RAPIDJSON +#include +#endif +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + +template +SerializationJSON::SerializationJSON( + std::unordered_map typed_paths_serializations_, + const std::unordered_set & paths_to_skip_, + const std::vector & path_regexps_to_skip_, + std::unique_ptr> json_extract_tree_) + : SerializationObject(std::move(typed_paths_serializations_), paths_to_skip_, path_regexps_to_skip_) + , json_extract_tree(std::move(json_extract_tree_)) +{ +} + +namespace +{ + +/// Struct that represents elements of the JSON path. +/// "a.b.c" -> ["a", "b", "c"] +struct PathElements +{ + explicit PathElements(const String & path) + { + const char * start = path.data(); + const char * end = start + path.size(); + const char * pos = start; + const char * last_dot_pos = pos - 1; + for (pos = start; pos != end; ++pos) + { + if (*pos == '.') + { + elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 1)); + last_dot_pos = pos; + } + } + + elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 1)); + } + + size_t size() const { return elements.size(); } + + std::vector elements; +}; + +/// Struct that represents a prefix of a JSON path. Used during output of the JSON object. +struct Prefix +{ + /// Shrink current prefix to the common prefix of current prefix and specified path. + /// For example, if current prefix is a.b.c.d and path is a.b.e, then shrink the prefix to a.b. + void shrinkToCommonPrefix(const PathElements & path_elements) + { + /// Don't include last element in path_elements in the prefix. + size_t i = 0; + while (i != elements.size() && i != (path_elements.elements.size() - 1) && elements[i].first == path_elements.elements[i]) + ++i; + elements.resize(i); + } + + /// Check is_first flag in current object. + bool isFirstInCurrentObject() const + { + if (elements.empty()) + return root_is_first_flag; + return elements.back().second; + } + + /// Set flag is_first = false in current object. + void setNotFirstInCurrentObject() + { + if (elements.empty()) + root_is_first_flag = false; + else + elements.back().second = false; + } + + size_t size() const { return elements.size(); } + + /// Elements of the prefix: (path element, is_first flag in this prefix). + /// is_first flag indicates if we already serialized some key in the object with such prefix. + std::vector> elements; + bool root_is_first_flag = true; +}; + +} + +template +void SerializationJSON::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, bool pretty, size_t indent) const +{ + const auto & column_object = assert_cast(column); + const auto & typed_paths = column_object.getTypedPaths(); + const auto & dynamic_paths = column_object.getDynamicPaths(); + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + size_t shared_data_offset = shared_data_offsets[static_cast(row_num) - 1]; + size_t shared_data_end = shared_data_offsets[static_cast(row_num)]; + + /// We need to convert the set of paths in this row to a JSON object. + /// To do it, we first collect all the paths from current row, then we sort them + /// and construct the resulting JSON object by iterating over sorted list of paths. + /// For example: + /// b.c, a.b, a.a, b.e, g, h.u.t -> a.a, a.b, b.c, b.e, g, h.u.t -> {"a" : {"a" : ..., "b" : ...}, "b" : {"c" : ..., "e" : ...}, "g" : ..., "h" : {"u" : {"t" : ...}}}. + std::vector sorted_paths; + sorted_paths.reserve(typed_paths.size() + dynamic_paths.size() + (shared_data_end - shared_data_offset)); + for (const auto & [path, _] : typed_paths) + sorted_paths.emplace_back(path); + for (const auto & [path, dynamic_column] : dynamic_paths) + { + /// We consider null value and absence of the path in a row as equivalent cases, because we cannot actually distinguish them. + /// So, we don't output null values at all. + if (!dynamic_column->isNullAt(row_num)) + sorted_paths.emplace_back(path); + } + for (size_t i = shared_data_offset; i != shared_data_end; ++i) + { + auto path = shared_data_paths->getDataAt(i).toString(); + sorted_paths.emplace_back(path); + } + + std::sort(sorted_paths.begin(), sorted_paths.end()); + + if (pretty) + writeCString("{\n", ostr); + else + writeChar('{', ostr); + size_t index_in_shared_data_values = shared_data_offset; + /// current_prefix represents the path of the object we are currently serializing keys in. + Prefix current_prefix; + for (const auto & path : sorted_paths) + { + PathElements path_elements(path); + /// Change prefix to common prefix between current prefix and current path. + /// If prefix changed (it can only decrease), close all finished objects. + /// For example: + /// Current prefix: a.b.c.d + /// Current path: a.b.e.f + /// It means now we have : {..., "a" : {"b" : {"c" : {"d" : ... + /// Common prefix will be a.b, so it means we should close objects a.b.c.d and a.b.c: {..., "a" : {"b" : {"c" : {"d" : ...}} + /// and continue serializing keys in object a.b + size_t prev_prefix_size = current_prefix.size(); + current_prefix.shrinkToCommonPrefix(path_elements); + size_t prefix_size = current_prefix.size(); + if (prefix_size != prev_prefix_size) + { + size_t objects_to_close = prev_prefix_size - prefix_size; + if (pretty) + { + writeChar('\n', ostr); + for (size_t i = 0; i != objects_to_close; ++i) + { + writeChar(' ', (indent + prefix_size + objects_to_close - i) * 4, ostr); + if (i != objects_to_close - 1) + writeCString("}\n", ostr); + else + writeChar('}', ostr); + } + } + else + { + for (size_t i = 0; i != objects_to_close; ++i) + writeChar('}', ostr); + } + } + + /// Now we are inside object that has common prefix with current path. + /// We should go inside all objects in current path. + /// From the example above we should open object a.b.e: + /// {..., "a" : {"b" : {"c" : {"d" : ...}}, "e" : { + if (prefix_size + 1 < path_elements.size()) + { + for (size_t i = prefix_size; i != path_elements.size() - 1; ++i) + { + /// Write comma before the key if it's not the first key in this prefix. + if (!current_prefix.isFirstInCurrentObject()) + { + if (pretty) + writeCString(",\n", ostr); + else + writeChar(',', ostr); + } + else + { + current_prefix.setNotFirstInCurrentObject(); + } + + if (pretty) + { + writeChar(' ', (indent + i + 1) * 4, ostr); + writeJSONString(path_elements.elements[i], ostr, settings); + writeCString(" : {\n", ostr); + } + else + { + writeJSONString(path_elements.elements[i], ostr, settings); + writeCString(":{", ostr); + } + + /// Update current prefix. + current_prefix.elements.emplace_back(path_elements.elements[i], true); + } + } + + /// Write comma before the key if it's not the first key in this prefix. + if (!current_prefix.isFirstInCurrentObject()) + { + if (pretty) + writeCString(",\n", ostr); + else + writeChar(',', ostr); + } + else + { + current_prefix.setNotFirstInCurrentObject(); + } + + if (pretty) + { + writeChar(' ', (indent + current_prefix.size() + 1) * 4, ostr); + writeJSONString(path_elements.elements.back(), ostr, settings); + writeCString(" : ", ostr); + } + else + { + writeJSONString(path_elements.elements.back(), ostr, settings); + writeCString(":", ostr); + } + + /// Serialize value of current path. + if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end()) + { + if (pretty) + typed_path_serializations.at(path)->serializeTextJSONPretty(*typed_it->second, row_num, ostr, settings, indent + current_prefix.size() + 1); + else + typed_path_serializations.at(path)->serializeTextJSON(*typed_it->second, row_num, ostr, settings); + } + else if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end()) + { + if (pretty) + dynamic_serialization->serializeTextJSONPretty(*dynamic_it->second, row_num, ostr, settings, indent + current_prefix.size() + 1); + else + dynamic_serialization->serializeTextJSON(*dynamic_it->second, row_num, ostr, settings); + } + else + { + /// To serialize value stored in shared data we should first deserialize it from binary format. + auto tmp_dynamic_column = ColumnDynamic::create(); + tmp_dynamic_column->reserve(1); + column_object.deserializeValueFromSharedData(shared_data_values, index_in_shared_data_values++, *tmp_dynamic_column); + + if (pretty) + dynamic_serialization->serializeTextJSONPretty(*tmp_dynamic_column, 0, ostr, settings, indent + current_prefix.size() + 1); + else + dynamic_serialization->serializeTextJSON(*tmp_dynamic_column, 0, ostr, settings); + } + } + + /// Close all remaining open objects. + if (pretty) + { + writeChar('\n', ostr); + for (size_t i = 0; i != current_prefix.elements.size(); ++i) + { + writeChar(' ', (indent + current_prefix.size() - i) * 4, ostr); + writeCString("}\n", ostr); + } + writeChar(' ', indent * 4, ostr); + writeChar('}', ostr); + } + else + { + for (size_t i = 0; i != current_prefix.elements.size(); ++i) + writeChar('}', ostr); + writeChar('}', ostr); + } +} + +template +void SerializationJSON::deserializeTextImpl(IColumn & column, std::string_view object, const FormatSettings & settings) const +{ + typename Parser::Element document; + auto parser = parsers_pool.get([] { return new Parser; }); + if (!parser->parse(object, document)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", object); + + String error; + if (!json_extract_tree->insertResultToColumn(column, document, insert_settings, settings, error)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot insert data into JSON column: {}", error); +} + +template +void SerializationJSON::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextImpl(column, row_num, ostr, settings); +} + +template +void SerializationJSON::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String object; + readStringUntilEOF(object, istr); + deserializeTextImpl(column, object, settings); +} + +template +void SerializationJSON::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString buf; + serializeTextImpl(column, row_num, buf, settings); + writeEscapedString(buf.str(), ostr); +} + +template +void SerializationJSON::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String object; + readEscapedString(object, istr); + deserializeTextImpl(column, object, settings); +} + +template +void SerializationJSON::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString buf; + serializeTextImpl(column, row_num, buf, settings); + writeQuotedString(buf.str(), ostr); +} + +template +void SerializationJSON::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String object; + readQuotedString(object, istr); + deserializeTextImpl(column, object, settings); +} + +template +void SerializationJSON::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString buf; + serializeTextImpl(column, row_num, buf, settings); + writeCSVString(buf.str(), ostr); +} + +template +void SerializationJSON::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String object; + readCSVString(object, istr, settings.csv); + deserializeTextImpl(column, object, settings); +} + +template +void SerializationJSON::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString buf; + serializeTextImpl(column, row_num, buf, settings); + writeXMLStringForTextElement(buf.str(), ostr); +} + +template +void SerializationJSON::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextImpl(column, row_num, ostr, settings); +} + +template +void SerializationJSON::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const +{ + serializeTextImpl(column, row_num, ostr, settings, true, indent); +} + +template +void SerializationJSON::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String object_buffer; + auto object_view = readJSONObjectAsViewPossiblyInvalid(istr, object_buffer); + deserializeTextImpl(column, object_view, settings); +} + +#if USE_SIMDJSON +template class SerializationJSON; +#endif +#if USE_RAPIDJSON +template class SerializationJSON; +#else +template class SerializationJSON; +#endif + +} diff --git a/src/DataTypes/Serializations/SerializationJSON.h b/src/DataTypes/Serializations/SerializationJSON.h new file mode 100644 index 00000000000..934c94527f3 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationJSON.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Class for text serialization/deserialization of the JSON data type. +template +class SerializationJSON : public SerializationObject +{ +public: + SerializationJSON( + std::unordered_map typed_paths_serializations_, + const std::unordered_set & paths_to_skip_, + const std::vector & path_regexps_to_skip_, + std::unique_ptr> json_extract_tree_); + + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + +private: + void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, bool pretty = false, size_t indent = 0) const; + void deserializeTextImpl(IColumn & column, std::string_view object, const FormatSettings & settings) const; + + std::unique_ptr> json_extract_tree; + JSONExtractInsertSettings insert_settings; + /// Pool of parser objects to make SerializationJSON thread safe. + mutable SimpleObjectPool parsers_pool; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 40071c4607a..3195a04d348 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -268,9 +268,16 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( void SerializationLowCardinality::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, - SubstreamsDeserializeStatesCache * /*cache*/) const + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::DictionaryKeys); + + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + state = std::move(cached_state); + return; + } + auto * stream = settings.getter(settings.path); settings.path.pop_back(); diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 0bef3c7d79d..c722b3ac7a1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -40,7 +40,7 @@ static IColumn & extractNestedColumn(IColumn & column) void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & map = field.get(); + const auto & map = field.safeGet(); writeVarUInt(map.size(), ostr); for (const auto & elem : map) { @@ -63,7 +63,7 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const size, settings.binary.max_binary_string_size); field = Map(); - Map & map = field.get(); + Map & map = field.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) { diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index bdb4dfc6735..bfc13af8ca3 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -169,7 +169,7 @@ template void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { /// ColumnVector::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64 - typename ColumnVector::ValueType x = static_cast::ValueType>(field.get()); + typename ColumnVector::ValueType x = static_cast::ValueType>(field.safeGet()); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index c6c87b5aa7b..760f6ce750d 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -1,586 +1,793 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int INCORRECT_DATA; - extern const int CANNOT_READ_ALL_DATA; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int CANNOT_PARSE_TEXT; - extern const int EXPERIMENTAL_FEATURE_ERROR; + extern const int LOGICAL_ERROR; } -template -template -void SerializationObject::deserializeTextImpl(IColumn & column, Reader && reader) const +SerializationObject::SerializationObject( + std::unordered_map typed_path_serializations_, + const std::unordered_set & paths_to_skip_, + const std::vector & path_regexps_to_skip_) + : typed_path_serializations(std::move(typed_path_serializations_)) + , paths_to_skip(paths_to_skip_) + , dynamic_serialization(std::make_shared()) + , shared_data_serialization(getTypeOfSharedData()->getDefaultSerialization()) { - auto & column_object = assert_cast(column); + /// We will need sorted order of typed paths to serialize them in order for consistency. + sorted_typed_paths.reserve(typed_path_serializations.size()); + for (const auto & [path, _] : typed_path_serializations) + sorted_typed_paths.emplace_back(path); + std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end()); + sorted_paths_to_skip.assign(paths_to_skip.begin(), paths_to_skip.end()); + std::sort(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end()); + for (const auto & regexp_str : path_regexps_to_skip_) + path_regexps_to_skip.emplace_back(regexp_str); +} - String buf; - reader(buf); - std::optional result; +const DataTypePtr & SerializationObject::getTypeOfSharedData() +{ + /// Array(Tuple(String, String)) + static const DataTypePtr type = std::make_shared(std::make_shared(DataTypes{std::make_shared(), std::make_shared()}, Names{"paths", "values"})); + return type; +} - /// Treat empty string as an empty object - /// for better CAST from String to Object. - if (!buf.empty()) +bool SerializationObject::shouldSkipPath(const String & path) const +{ + if (paths_to_skip.contains(path)) + return true; + + auto it = std::lower_bound(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end(), path); + if (it != sorted_paths_to_skip.end() && it != sorted_paths_to_skip.begin() && path.starts_with(*std::prev(it))) + return true; + + for (const auto & regexp : path_regexps_to_skip) { - auto parser = parsers_pool.get([] { return new Parser; }); - result = parser->parse(buf.data(), buf.size()); - } - else - { - result = ParseResult{}; + if (re2::RE2::FullMatch(path, regexp)) + return true; } - if (!result) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse object"); + return false; +} - auto & [paths, values] = *result; - assert(paths.size() == values.size()); +SerializationObject::ObjectSerializationVersion::ObjectSerializationVersion(UInt64 version) : value(static_cast(version)) +{ + checkVersion(version); +} - size_t old_column_size = column_object.size(); - for (size_t i = 0; i < paths.size(); ++i) +void SerializationObject::ObjectSerializationVersion::checkVersion(UInt64 version) +{ + if (version != BASIC) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Object structure serialization."); +} + +struct SerializeBinaryBulkStateObject: public ISerialization::SerializeBinaryBulkState +{ + SerializationObject::ObjectSerializationVersion serialization_version; + size_t max_dynamic_paths; + std::vector sorted_dynamic_paths; + std::unordered_map typed_path_states; + std::unordered_map dynamic_path_states; + ISerialization::SerializeBinaryBulkStatePtr shared_data_state; + /// Paths statistics. + ColumnObject::Statistics statistics; + /// If true, statistics will be recalculated during serialization. + bool recalculate_statistics = false; + + explicit SerializeBinaryBulkStateObject(UInt64 serialization_version_) + : serialization_version(serialization_version_), statistics(ColumnObject::Statistics::Source::READ) { - auto field_info = getFieldInfo(values[i]); - if (field_info.need_fold_dimension) - values[i] = applyVisitor(FieldVisitorFoldDimension(field_info.num_dimensions), std::move(values[i])); - if (isNothing(field_info.scalar_type)) - continue; + } +}; - if (!column_object.hasSubcolumn(paths[i])) +struct DeserializeBinaryBulkStateObject : public ISerialization::DeserializeBinaryBulkState +{ + std::unordered_map typed_path_states; + std::unordered_map dynamic_path_states; + ISerialization::DeserializeBinaryBulkStatePtr shared_data_state; + ISerialization::DeserializeBinaryBulkStatePtr structure_state; +}; + +void SerializationObject::enumerateStreams(EnumerateStreamsSettings & settings, const StreamCallback & callback, const SubstreamData & data) const +{ + settings.path.push_back(Substream::ObjectStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * column_object = data.column ? &assert_cast(*data.column) : nullptr; + const auto * type_object = data.type ? &assert_cast(*data.type) : nullptr; + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + const auto * structure_state = deserialize_state ? checkAndGetState(deserialize_state->structure_state) : nullptr; + settings.path.push_back(Substream::ObjectData); + + /// First, iterate over typed paths in sorted order, we will always serialize them. + for (const auto & path : sorted_typed_paths) + { + settings.path.back().creator = std::make_shared(path); + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + const auto & serialization = typed_path_serializations.at(path); + auto path_data = SubstreamData(serialization) + .withType(type_object ? type_object->getTypedPaths().at(path) : nullptr) + .withColumn(column_object ? column_object->getTypedPaths().at(path) : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->typed_path_states.at(path) : nullptr); + settings.path.back().data = path_data; + serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); + settings.path.back().creator.reset(); + } + + /// If column or deserialization state was provided, iterate over dynamic paths, + if (settings.enumerate_dynamic_streams && (column_object || structure_state)) + { + /// Enumerate dynamic paths in sorted order for consistency. + const auto * dynamic_paths = column_object ? &column_object->getDynamicPaths() : nullptr; + std::vector sorted_dynamic_paths; + /// If we have deserialize_state we can take sorted dynamic paths list from it. + if (structure_state) { - if (paths[i].hasNested()) - column_object.addNestedSubcolumn(paths[i], field_info, old_column_size); - else - column_object.addSubcolumn(paths[i], old_column_size); + sorted_dynamic_paths = structure_state->sorted_dynamic_paths; + } + else + { + sorted_dynamic_paths.reserve(dynamic_paths->size()); + for (const auto & [path, _] : *dynamic_paths) + sorted_dynamic_paths.push_back(path); + std::sort(sorted_dynamic_paths.begin(), sorted_dynamic_paths.end()); } - auto & subcolumn = column_object.getSubcolumn(paths[i]); - assert(subcolumn.size() == old_column_size); - - subcolumn.insert(std::move(values[i]), std::move(field_info)); - } - - /// Insert default values to missed subcolumns. - const auto & subcolumns = column_object.getSubcolumns(); - for (const auto & entry : subcolumns) - { - if (entry->data.size() == old_column_size) + DataTypePtr dynamic_type = std::make_shared(); + for (const auto & path : sorted_dynamic_paths) { - bool inserted = column_object.tryInsertDefaultFromNested(entry); - if (!inserted) - entry->data.insertDefault(); + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + auto path_data = SubstreamData(dynamic_serialization) + .withType(dynamic_type) + .withColumn(dynamic_paths ? dynamic_paths->at(path) : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->dynamic_path_states.at(path) : nullptr); + settings.path.back().data = path_data; + dynamic_serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); } } - column_object.incrementNumRows(); + settings.path.push_back(Substream::ObjectSharedData); + auto shared_data_substream_data = SubstreamData(shared_data_serialization) + .withType(getTypeOfSharedData()) + .withColumn(column_object ? column_object->getSharedDataPtr() : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->shared_data_state : nullptr); + shared_data_serialization->enumerateStreams(settings, callback, shared_data_substream_data); + settings.path.pop_back(); + settings.path.pop_back(); } -template -void SerializationObject::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const -{ - deserializeTextImpl(column, [&](String & s) { readStringInto(s, istr); }); -} - -template -void SerializationObject::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - deserializeTextImpl(column, [&](String & s) { settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); }); -} - -template -void SerializationObject::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const -{ - deserializeTextImpl(column, [&](String & s) { readQuotedStringInto(s, istr); }); -} - -template -void SerializationObject::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const -{ - deserializeTextImpl(column, [&](String & s) { Parser::readJSON(s, istr); }); -} - -template -void SerializationObject::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - deserializeTextImpl(column, [&](String & s) { readCSVStringInto(s, istr, settings.csv); }); -} - -template -template -void SerializationObject::checkSerializationIsSupported(const TSettings & settings) const -{ - if (settings.position_independent_encoding) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DataTypeObject doesn't support serialization with position independent encoding"); -} - -template -struct SerializationObject::SerializeStateObject : public ISerialization::SerializeBinaryBulkState -{ - DataTypePtr nested_type; - SerializationPtr nested_serialization; - SerializeBinaryBulkStatePtr nested_state; -}; - -template -struct SerializationObject::DeserializeStateObject : public ISerialization::DeserializeBinaryBulkState -{ - BinarySerializationKind kind; - DataTypePtr nested_type; - SerializationPtr nested_serialization; - DeserializeBinaryBulkStatePtr nested_state; -}; - -template -void SerializationObject::serializeBinaryBulkStatePrefix( +void SerializationObject::serializeBinaryBulkStatePrefix( const IColumn & column, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - checkSerializationIsSupported(settings); - if (state) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DataTypeObject doesn't support serialization with non-trivial state"); - const auto & column_object = assert_cast(column); - if (!column_object.isFinalized()) - { - auto finalized = column_object.cloneFinalized(); - serializeBinaryBulkStatePrefix(*finalized, settings, state); - return; - } + const auto & typed_paths = column_object.getTypedPaths(); + const auto & dynamic_paths = column_object.getDynamicPaths(); + const auto & shared_data = column_object.getSharedDataPtr(); settings.path.push_back(Substream::ObjectStructure); auto * stream = settings.getter(settings.path); + settings.path.pop_back(); if (!stream) - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Missing stream for kind of binary serialization"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Object column structure during serialization of binary bulk state prefix"); - auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object); + /// Write serialization version. + UInt64 serialization_version = ObjectSerializationVersion::Value::BASIC; + writeBinaryLittleEndian(serialization_version, *stream); - writeIntBinary(static_cast(BinarySerializationKind::TUPLE), *stream); - writeStringBinary(tuple_type->getName(), *stream); + auto object_state = std::make_shared(serialization_version); + object_state->max_dynamic_paths = column_object.getMaxDynamicPaths(); + /// Write max_dynamic_paths parameter. + writeVarUInt(object_state->max_dynamic_paths, *stream); + /// Write all dynamic paths in sorted order. + object_state->sorted_dynamic_paths.reserve(dynamic_paths.size()); + for (const auto & [path, _] : dynamic_paths) + object_state->sorted_dynamic_paths.push_back(path); + std::sort(object_state->sorted_dynamic_paths.begin(), object_state->sorted_dynamic_paths.end()); + writeVarUInt(object_state->sorted_dynamic_paths.size(), *stream); + for (const auto & path : object_state->sorted_dynamic_paths) + writeStringBinary(path, *stream); - auto state_object = std::make_shared(); - state_object->nested_type = tuple_type; - state_object->nested_serialization = tuple_type->getDefaultSerialization(); + /// Write statistics in prefix if needed. + if (settings.object_and_dynamic_write_statistics == SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::PREFIX) + { + const auto & statistics = column_object.getStatistics(); + /// First, write statistics for dynamic paths. + for (const auto & path : object_state->sorted_dynamic_paths) + { + size_t number_of_non_null_values = 0; + /// Check if we can use statistics stored in the column. There are 2 possible sources + /// of this statistics: + /// - statistics calculated during merge of some data parts (Statistics::Source::MERGE) + /// - statistics read from the data part during deserialization of Object column (Statistics::Source::READ). + /// We can rely only on statistics calculated during the merge, because column with statistics that was read + /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated. + if (statistics && statistics->source == ColumnObject::Statistics::Source::MERGE) + number_of_non_null_values = statistics->dynamic_paths_statistics.at(path); + /// Otherwise we can use only path column from current object column. + else + number_of_non_null_values = (dynamic_paths.at(path)->size() - dynamic_paths.at(path)->getNumberOfDefaultRows()); + writeVarUInt(number_of_non_null_values, *stream); + } - settings.path.back() = Substream::ObjectData; - state_object->nested_serialization->serializeBinaryBulkStatePrefix(*tuple_column, settings, state_object->nested_state); + /// Second, write statistics for paths in shared data. + /// Check if we have statistics calculated during merge of some data parts (Statistics::Source::MERGE). + if (statistics && statistics->source == ColumnObject::Statistics::Source::MERGE) + { + writeVarUInt(statistics->shared_data_paths_statistics.size(), *stream); + for (const auto & [path, size] : statistics->shared_data_paths_statistics) + { + writeStringBinary(path, *stream); + writeVarUInt(size, *stream); + } + } + /// If we don't have statistics for shared data from merge, calculate it from the column. + else + { + std::unordered_map shared_data_paths_statistics; + const auto [shared_data_paths, _] = column_object.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_paths->size(); ++i) + { + auto path = shared_data_paths->getDataAt(i).toView(); + if (auto it = shared_data_paths_statistics.find(path); it != shared_data_paths_statistics.end()) + ++it->second; + else if (shared_data_paths_statistics.size() < ColumnObject::Statistics::MAX_SHARED_DATA_STATISTICS_SIZE) + shared_data_paths_statistics.emplace(path, 1); + } - state = std::move(state_object); - settings.path.pop_back(); -} - -template -void SerializationObject::serializeBinaryBulkStateSuffix( - SerializeBinaryBulkSettings & settings, - SerializeBinaryBulkStatePtr & state) const -{ - checkSerializationIsSupported(settings); - auto * state_object = checkAndGetState(state); + writeVarUInt(shared_data_paths_statistics.size(), *stream); + for (const auto & [path, size] : shared_data_paths_statistics) + { + writeStringBinary(path, *stream); + writeVarUInt(size, *stream); + } + } + } + /// Otherwise statistics will be written in the suffix, in this case we will recalculate + /// statistics during serialization to make it more precise. + else + { + object_state->recalculate_statistics = true; + } settings.path.push_back(Substream::ObjectData); - state_object->nested_serialization->serializeBinaryBulkStateSuffix(settings, state_object->nested_state); + + for (const auto & path : sorted_typed_paths) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + typed_path_serializations.at(path)->serializeBinaryBulkStatePrefix(*typed_paths.at(path), settings, object_state->typed_path_states[path]); + settings.path.pop_back(); + } + + for (const auto & path : object_state->sorted_dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + dynamic_serialization->serializeBinaryBulkStatePrefix(*dynamic_paths.at(path), settings, object_state->dynamic_path_states[path]); + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->serializeBinaryBulkStatePrefix(*shared_data, settings, object_state->shared_data_state); settings.path.pop_back(); + settings.path.pop_back(); + + state = std::move(object_state); } -template -void SerializationObject::deserializeBinaryBulkStatePrefix( +void SerializationObject::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - checkSerializationIsSupported(settings); - if (state) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DataTypeObject doesn't support serialization with non-trivial state"); + auto structure_state = deserializeObjectStructureStatePrefix(settings, cache); + if (!structure_state) + return; - settings.path.push_back(Substream::ObjectStructure); - auto * stream = settings.getter(settings.path); - settings.path.pop_back(); - - if (!stream) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, - "Cannot read kind of binary serialization of DataTypeObject, because its stream is missing"); - - UInt8 kind_raw; - readIntBinary(kind_raw, *stream); - auto kind = magic_enum::enum_cast(kind_raw); - if (!kind) - throw Exception(ErrorCodes::INCORRECT_DATA, - "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw)); - - auto state_object = std::make_shared(); - state_object->kind = *kind; - - if (state_object->kind == BinarySerializationKind::TUPLE) - { - String data_type_name; - readStringBinary(data_type_name, *stream); - state_object->nested_type = DataTypeFactory::instance().get(data_type_name); - state_object->nested_serialization = state_object->nested_type->getDefaultSerialization(); - - if (!isTuple(state_object->nested_type)) - throw Exception(ErrorCodes::INCORRECT_DATA, - "Data of type Object should be written as Tuple, got: {}", data_type_name); - } - else if (state_object->kind == BinarySerializationKind::STRING) - { - state_object->nested_type = std::make_shared(); - state_object->nested_serialization = std::make_shared(); - } - else - { - throw Exception(ErrorCodes::INCORRECT_DATA, - "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw)); - } + auto object_state = std::make_shared(); + object_state->structure_state = std::move(structure_state); settings.path.push_back(Substream::ObjectData); - state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache); + + for (const auto & path : sorted_typed_paths) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + typed_path_serializations.at(path)->deserializeBinaryBulkStatePrefix(settings, object_state->typed_path_states[path], cache); + settings.path.pop_back(); + } + + const auto & sorted_dynamic_paths = checkAndGetState(object_state->structure_state)->sorted_dynamic_paths; + for (const auto & path : sorted_dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + dynamic_serialization->deserializeBinaryBulkStatePrefix(settings, object_state->dynamic_path_states[path], cache); + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->deserializeBinaryBulkStatePrefix(settings, object_state->shared_data_state, cache); + settings.path.pop_back(); settings.path.pop_back(); - state = std::move(state_object); + state = std::move(object_state); } -template -void SerializationObject::serializeBinaryBulkWithMultipleStreams( +ISerialization::DeserializeBinaryBulkStatePtr SerializationObject::deserializeObjectStructureStatePrefix( + DeserializeBinaryBulkSettings & settings, SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::ObjectStructure); + + DeserializeBinaryBulkStatePtr state = nullptr; + /// Check if we already deserialized this state. It can happen when we read both object column and its subcolumns. + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + state = cached_state; + } + else if (auto * structure_stream = settings.getter(settings.path)) + { + /// Read structure serialization version. + UInt64 serialization_version; + readBinaryLittleEndian(serialization_version, *structure_stream); + auto structure_state = std::make_shared(serialization_version); + /// Read max_dynamic_paths parameter. + readVarUInt(structure_state->max_dynamic_paths, *structure_stream); + /// Read the sorted list of dynamic paths. + size_t dynamic_paths_size; + readVarUInt(dynamic_paths_size, *structure_stream); + structure_state->sorted_dynamic_paths.reserve(dynamic_paths_size); + structure_state->dynamic_paths.reserve(dynamic_paths_size); + for (size_t i = 0; i != dynamic_paths_size; ++i) + { + structure_state->sorted_dynamic_paths.emplace_back(); + readStringBinary(structure_state->sorted_dynamic_paths.back(), *structure_stream); + structure_state->dynamic_paths.insert(structure_state->sorted_dynamic_paths.back()); + } + + /// Read statistics if needed. + if (settings.object_and_dynamic_read_statistics) + { + ColumnObject::Statistics statistics(ColumnObject::Statistics::Source::READ); + statistics.dynamic_paths_statistics.reserve(structure_state->sorted_dynamic_paths.size()); + /// First, read dynamic paths statistics. + for (const auto & path : structure_state->sorted_dynamic_paths) + readVarUInt(statistics.dynamic_paths_statistics[path], *structure_stream); + + /// Second, read shared data paths statistics. + size_t size; + readVarUInt(size, *structure_stream); + statistics.shared_data_paths_statistics.reserve(size); + String path; + for (size_t i = 0; i != size; ++i) + { + readStringBinary(path, *structure_stream); + readVarUInt(statistics.shared_data_paths_statistics[path], *structure_stream); + } + + structure_state->statistics = std::make_shared(std::move(statistics)); + } + + state = std::move(structure_state); + addToSubstreamsDeserializeStatesCache(cache, settings.path, state); + } + + settings.path.pop_back(); + return state; +} + +void SerializationObject::serializeBinaryBulkWithMultipleStreams( const IColumn & column, size_t offset, size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - checkSerializationIsSupported(settings); const auto & column_object = assert_cast(column); - auto * state_object = checkAndGetState(state); + const auto & typed_paths = column_object.getTypedPaths(); + const auto & dynamic_paths = column_object.getDynamicPaths(); + const auto & shared_data = column_object.getSharedDataPtr(); + auto * object_state = checkAndGetState(state); - if (!column_object.isFinalized()) - { - auto finalized = column_object.cloneFinalized(); - serializeBinaryBulkWithMultipleStreams(*finalized, offset, limit, settings, state); - return; - } + if (column_object.getMaxDynamicPaths() != object_state->max_dynamic_paths) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_paths parameter of Object. Expected: {}, Got: {}", object_state->max_dynamic_paths, column_object.getMaxDynamicPaths()); - auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object); - - if (!state_object->nested_type->equals(*tuple_type)) - { - throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, - "Types of internal column of Object mismatched. Expected: {}, Got: {}", - state_object->nested_type->getName(), tuple_type->getName()); - } + if (column_object.getDynamicPaths().size() != object_state->sorted_dynamic_paths.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of number of dynamic paths in Object. Expected: {}, Got: {}", object_state->sorted_dynamic_paths.size(), column_object.getDynamicPaths().size()); settings.path.push_back(Substream::ObjectData); - if (auto * stream = settings.getter(settings.path)) + + for (const auto & path : sorted_typed_paths) { - state_object->nested_serialization->serializeBinaryBulkWithMultipleStreams( - *tuple_column, offset, limit, settings, state_object->nested_state); + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + typed_path_serializations.at(path)->serializeBinaryBulkWithMultipleStreams(*typed_paths.at(path), offset, limit, settings, object_state->typed_path_states[path]); + settings.path.pop_back(); } + const auto * dynamic_serialization_typed = assert_cast(dynamic_serialization.get()); + for (const auto & path : object_state->sorted_dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + auto it = dynamic_paths.find(path); + if (it == dynamic_paths.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Dynamic structure mismatch for Object column: dynamic path '{}' is not found in the column", path); + if (object_state->recalculate_statistics) + { + size_t number_of_non_null_values = 0; + dynamic_serialization_typed->serializeBinaryBulkWithMultipleStreamsAndCountTotalSizeOfVariants(*it->second, offset, limit, settings, object_state->dynamic_path_states[path], number_of_non_null_values); + object_state->statistics.dynamic_paths_statistics[path] += number_of_non_null_values; + } + else + { + dynamic_serialization_typed->serializeBinaryBulkWithMultipleStreams(*it->second, offset, limit, settings, object_state->dynamic_path_states[path]); + } + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->serializeBinaryBulkWithMultipleStreams(*shared_data, offset, limit, settings, object_state->shared_data_state); + if (object_state->recalculate_statistics) + { + /// Calculate statistics for paths in shared data. + const auto [shared_data_paths, _] = column_object.getSharedDataPathsAndValues(); + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + size_t start = shared_data_offsets[offset - 1]; + size_t end = limit == 0 || offset + limit > shared_data_offsets.size() ? shared_data_paths->size() : shared_data_offsets[offset + limit - 1]; + for (size_t i = start; i != end; ++i) + { + auto path = shared_data_paths->getDataAt(i).toView(); + if (auto it = object_state->statistics.shared_data_paths_statistics.find(path); it != object_state->statistics.shared_data_paths_statistics.end()) + ++it->second; + else if (object_state->statistics.shared_data_paths_statistics.size() < ColumnObject::Statistics::MAX_SHARED_DATA_STATISTICS_SIZE) + object_state->statistics.shared_data_paths_statistics.emplace(path, 1); + } + } + settings.path.pop_back(); settings.path.pop_back(); } -template -void SerializationObject::deserializeBinaryBulkWithMultipleStreams( +void SerializationObject::serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const +{ + auto * object_state = checkAndGetState(state); + settings.path.push_back(Substream::ObjectStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Object column structure during serialization of binary bulk state suffix"); + + /// Write statistics in suffix if needed. + if (settings.object_and_dynamic_write_statistics == SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::SUFFIX) + { + /// First, write dynamic paths statistics. + for (const auto & path : object_state->sorted_dynamic_paths) + writeVarUInt(object_state->statistics.dynamic_paths_statistics[path], *stream); + + /// Second, write shared data paths statistics. + writeVarUInt(object_state->statistics.shared_data_paths_statistics.size(), *stream); + for (const auto & [path, size] : object_state->statistics.shared_data_paths_statistics) + { + writeStringBinary(path, *stream); + writeVarUInt(size, *stream); + } + } + + settings.path.push_back(Substream::ObjectData); + + for (const auto & path : sorted_typed_paths) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + typed_path_serializations.at(path)->serializeBinaryBulkStateSuffix(settings, object_state->typed_path_states[path]); + settings.path.pop_back(); + } + + for (const auto & path : object_state->sorted_dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + dynamic_serialization->serializeBinaryBulkStateSuffix(settings, object_state->dynamic_path_states[path]); + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->serializeBinaryBulkStateSuffix(settings, object_state->shared_data_state); + settings.path.pop_back(); + settings.path.pop_back(); +} + +void SerializationObject::deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - checkSerializationIsSupported(settings); - if (!column->empty()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DataTypeObject cannot be deserialized to non-empty column"); + if (!state) + return; + auto * object_state = checkAndGetState(state); + auto * structure_state = checkAndGetState(object_state->structure_state); auto mutable_column = column->assumeMutable(); auto & column_object = assert_cast(*mutable_column); - auto * state_object = checkAndGetState(state); + /// If it's a new object column, set dynamic paths and statistics. + if (column_object.empty()) + { + column_object.setMaxDynamicPaths(structure_state->max_dynamic_paths); + column_object.setDynamicPaths(structure_state->sorted_dynamic_paths); + column_object.setStatistics(structure_state->statistics); + } + + auto & typed_paths = column_object.getTypedPaths(); + auto & dynamic_paths = column_object.getDynamicPaths(); + auto & shared_data = column_object.getSharedDataPtr(); settings.path.push_back(Substream::ObjectData); - if (state_object->kind == BinarySerializationKind::STRING) - deserializeBinaryBulkFromString(column_object, limit, settings, *state_object, cache); - else - deserializeBinaryBulkFromTuple(column_object, limit, settings, *state_object, cache); + for (const auto & path : sorted_typed_paths) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + typed_path_serializations.at(path)->deserializeBinaryBulkWithMultipleStreams(typed_paths[path], limit, settings, object_state->typed_path_states[path], cache); + settings.path.pop_back(); + } + for (const auto & path : structure_state->sorted_dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + dynamic_serialization->deserializeBinaryBulkWithMultipleStreams(dynamic_paths[path], limit, settings, object_state->dynamic_path_states[path], cache); + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(shared_data, limit, settings, object_state->shared_data_state, cache); + settings.path.pop_back(); settings.path.pop_back(); - column_object.checkConsistency(); - column_object.finalize(); - column = std::move(mutable_column); } -template -void SerializationObject::deserializeBinaryBulkFromString( - ColumnObject & column_object, - size_t limit, - DeserializeBinaryBulkSettings & settings, - DeserializeStateObject & state, - SubstreamsCache * cache) const +void SerializationObject::serializeBinary(const Field & field, WriteBuffer & ostr, const DB::FormatSettings & settings) const { - ColumnPtr column_string = state.nested_type->createColumn(); - state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( - column_string, limit, settings, state.nested_state, cache); - - size_t input_rows_count = column_string->size(); - column_object.reserve(input_rows_count); - - FormatSettings format_settings; - for (size_t i = 0; i < input_rows_count; ++i) + const auto & object = field.safeGet(); + /// Serialize number of paths and then pairs (path, value). + writeVarUInt(object.size(), ostr); + for (const auto & [path, value] : object) { - const auto & val = column_string->getDataAt(i); - ReadBufferFromMemory read_buffer(val.data, val.size); - deserializeWholeText(column_object, read_buffer, format_settings); - - if (!read_buffer.eof()) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column Object. Expected eof"); - } -} - -template -void SerializationObject::deserializeBinaryBulkFromTuple( - ColumnObject & column_object, - size_t limit, - DeserializeBinaryBulkSettings & settings, - DeserializeStateObject & state, - SubstreamsCache * cache) const -{ - ColumnPtr column_tuple = state.nested_type->createColumn(); - state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( - column_tuple, limit, settings, state.nested_state, cache); - - auto [tuple_paths, tuple_types] = flattenTuple(state.nested_type); - auto flattened_tuple = flattenTuple(column_tuple); - const auto & tuple_columns = assert_cast(*flattened_tuple).getColumns(); - - assert(tuple_paths.size() == tuple_types.size()); - size_t num_subcolumns = tuple_paths.size(); - - if (tuple_columns.size() != num_subcolumns) - throw Exception(ErrorCodes::INCORRECT_DATA, - "Inconsistent type ({}) and column ({}) while reading column of type Object", - state.nested_type->getName(), column_tuple->getName()); - - for (size_t i = 0; i < num_subcolumns; ++i) - column_object.addSubcolumn(tuple_paths[i], tuple_columns[i]->assumeMutable()); -} - -template -void SerializationObject::serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); -} - -template -void SerializationObject::deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); -} - -template -void SerializationObject::serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); -} - -template -void SerializationObject::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); -} - -/// TODO: use format different of JSON in serializations. - -template -void SerializationObject::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - const auto & column_object = assert_cast(column); - const auto & subcolumns = column_object.getSubcolumns(); - - writeChar('{', ostr); - for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it) - { - const auto & entry = *it; - if (it != subcolumns.begin()) - writeCString(",", ostr); - - writeDoubleQuoted(entry->path.getPath(), ostr); - writeChar(':', ostr); - serializeTextFromSubcolumn(entry->data, row_num, ostr, settings); - } - writeChar('}', ostr); -} - -template -template -void SerializationObject::serializeTextFromSubcolumn( - const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const -{ - const auto & least_common_type = subcolumn.getLeastCommonType(); - - if (subcolumn.isFinalized()) - { - const auto & finalized_column = subcolumn.getFinalizedColumn(); - auto info = least_common_type->getSerializationInfo(finalized_column); - auto serialization = least_common_type->getSerialization(*info); - if constexpr (pretty_json) - serialization->serializeTextJSONPretty(finalized_column, row_num, ostr, settings, indent); + writeStringBinary(path, ostr); + if (auto it = typed_path_serializations.find(path); it != typed_path_serializations.end()) + it->second->serializeBinary(value, ostr, settings); else - serialization->serializeTextJSON(finalized_column, row_num, ostr, settings); - return; + dynamic_serialization->serializeBinary(value, ostr, settings); + } +} + +void SerializationObject::serializeBinary(const IColumn & col, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_object = assert_cast(col); + const auto & typed_paths = column_object.getTypedPaths(); + const auto & dynamic_paths = column_object.getDynamicPaths(); + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + size_t offset = shared_data_offsets[ssize_t(row_num) - 1]; + size_t end = shared_data_offsets[ssize_t(row_num)]; + + /// Serialize number of paths and then pairs (path, value). + writeVarUInt(typed_paths.size() + dynamic_paths.size() + (end - offset), ostr); + + for (const auto & [path, column] : typed_paths) + { + writeStringBinary(path, ostr); + typed_path_serializations.at(path)->serializeBinary(*column, row_num, ostr, settings); } - size_t ind = row_num; - if (ind < subcolumn.getNumberOfDefaultsInPrefix()) + for (const auto & [path, column] : dynamic_paths) { - /// Suboptimal, but it should happen rarely. - auto tmp_column = subcolumn.getLeastCommonType()->createColumn(); - tmp_column->insertDefault(); - - auto info = least_common_type->getSerializationInfo(*tmp_column); - auto serialization = least_common_type->getSerialization(*info); - if constexpr (pretty_json) - serialization->serializeTextJSONPretty(*tmp_column, 0, ostr, settings, indent); - else - serialization->serializeTextJSON(*tmp_column, 0, ostr, settings); - return; + writeStringBinary(path, ostr); + dynamic_serialization->serializeBinary(*column, row_num, ostr, settings); } - ind -= subcolumn.getNumberOfDefaultsInPrefix(); - for (const auto & part : subcolumn.getData()) + const auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + for (size_t i = offset; i != end; ++i) { - if (ind < part->size()) + writeStringBinary(shared_data_paths->getDataAt(i), ostr); + auto value = shared_data_values->getDataAt(i); + ostr.write(value.data, value.size); + } +} + +void SerializationObject::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const +{ + Object object; + size_t number_of_paths; + readVarUInt(number_of_paths, istr); + /// Read pairs (path, value). + for (size_t i = 0; i != number_of_paths; ++i) + { + String path; + readStringBinary(path, istr); + if (!shouldSkipPath(path)) { - auto part_type = getDataTypeByColumn(*part); - auto info = part_type->getSerializationInfo(*part); - auto serialization = part_type->getSerialization(*info); - if constexpr (pretty_json) - serialization->serializeTextJSONPretty(*part, ind, ostr, settings, indent); + if (auto it = typed_path_serializations.find(path); it != typed_path_serializations.end()) + it->second->deserializeBinary(object[path], istr, settings); else - serialization->serializeTextJSON(*part, ind, ostr, settings); - return; + dynamic_serialization->deserializeBinary(object[path], istr, settings); + } + else + { + /// Skip value of this path. + Field tmp; + dynamic_serialization->deserializeBinary(tmp, istr, settings); + } + } + + field = std::move(object); +} + +/// Restore column object to the state with previous size. +/// We can use it in case of an exception during deserialization. +void SerializationObject::restoreColumnObject(ColumnObject & column_object, size_t prev_size) +{ + auto & typed_paths = column_object.getTypedPaths(); + auto & dynamic_paths = column_object.getDynamicPaths(); + auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + auto & shared_data_offsets = column_object.getSharedDataOffsets(); + + for (auto & [_, column] : typed_paths) + { + if (column->size() > prev_size) + column->popBack(column->size() - prev_size); + } + + for (auto & [_, column] : dynamic_paths) + { + if (column->size() > prev_size) + column->popBack(column->size() - prev_size); + } + + if (shared_data_offsets.size() > prev_size) + shared_data_offsets.resize(prev_size); + size_t prev_shared_data_offset = shared_data_offsets.back(); + if (shared_data_paths->size() > prev_shared_data_offset) + shared_data_paths->popBack(shared_data_paths->size() - prev_shared_data_offset); + if (shared_data_values->size() > prev_shared_data_offset) + shared_data_values->popBack(shared_data_values->size() - prev_shared_data_offset); +} + +void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto & column_object = assert_cast(col); + auto & typed_paths = column_object.getTypedPaths(); + auto & dynamic_paths = column_object.getDynamicPaths(); + auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + auto & shared_data_offsets = column_object.getSharedDataOffsets(); + + size_t number_of_paths; + readVarUInt(number_of_paths, istr); + std::vector> paths_and_values_for_shared_data; + size_t prev_size = column_object.size(); + try + { + /// Read pairs (path, value). + for (size_t i = 0; i != number_of_paths; ++i) + { + String path; + readStringBinary(path, istr); + if (!shouldSkipPath(path)) + { + /// Check if we have this path in typed paths. + if (auto typed_it = typed_path_serializations.find(path); typed_it != typed_path_serializations.end()) + { + auto & typed_column = typed_paths[path]; + /// Check if we already had this path. + if (typed_column->size() > prev_size) + { + if (!settings.json.type_json_skip_duplicated_paths) + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); + } + else + { + typed_it->second->deserializeBinary(*typed_column, istr, settings); + } + } + /// Check if we have this path in dynamic paths. + else if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end()) + { + /// Check if we already had this path. + if (dynamic_it->second->size() > prev_size) + { + if (!settings.json.type_json_skip_duplicated_paths) + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); + } + + dynamic_serialization->deserializeBinary(*dynamic_it->second, istr, settings); + } + /// Try to add a new dynamic paths. + else if (auto * dynamic_column = column_object.tryToAddNewDynamicPath(path)) + { + dynamic_serialization->deserializeBinary(*dynamic_column, istr, settings); + } + /// Otherwise this path should go to shared data. + else + { + auto tmp_dynamic_column = ColumnDynamic::create(); + tmp_dynamic_column->reserve(1); + String value; + readParsedValueIntoString(value, istr, [&](ReadBuffer & buf){ dynamic_serialization->deserializeBinary(*tmp_dynamic_column, buf, settings); }); + paths_and_values_for_shared_data.emplace_back(std::move(path), std::move(value)); + } + } + else + { + /// Skip value of this path. + Field tmp; + dynamic_serialization->deserializeBinary(tmp, istr, settings); + } } - ind -= part->size(); + std::sort(paths_and_values_for_shared_data.begin(), paths_and_values_for_shared_data.end()); + for (size_t i = 0; i != paths_and_values_for_shared_data.size(); ++i) + { + const auto & [path, value] = paths_and_values_for_shared_data[i]; + if (i != 0 && path == paths_and_values_for_shared_data[i - 1].first) + { + if (!settings.json.type_json_skip_duplicated_paths) + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); + } + else + { + shared_data_paths->insertData(path.data(), path.size()); + shared_data_values->insertData(value.data(), value.size()); + } + } + shared_data_offsets.push_back(shared_data_paths->size()); } - - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for text serialization is out of range", row_num); -} - -template -void SerializationObject::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - serializeTextImpl(column, row_num, ostr, settings); -} - -template -void SerializationObject::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - WriteBufferFromOwnString ostr_str; - serializeTextImpl(column, row_num, ostr_str, settings); - writeEscapedString(ostr_str.str(), ostr); -} - -template -void SerializationObject::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - WriteBufferFromOwnString ostr_str; - serializeTextImpl(column, row_num, ostr_str, settings); - writeQuotedString(ostr_str.str(), ostr); -} - -template -void SerializationObject::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - serializeTextImpl(column, row_num, ostr, settings); -} - -template -void SerializationObject::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - WriteBufferFromOwnString ostr_str; - serializeTextImpl(column, row_num, ostr_str, settings); - writeCSVString(ostr_str.str(), ostr); -} - -template -void SerializationObject::serializeTextMarkdown( - const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const -{ - if (settings.markdown.escape_special_characters) + catch (...) { - WriteBufferFromOwnString ostr_str; - serializeTextImpl(column, row_num, ostr_str, settings); - writeMarkdownEscapedString(ostr_str.str(), ostr); + restoreColumnObject(column_object, prev_size); + throw; } - else + + /// Insert default to all remaining typed and dynamic paths. + for (auto & [_, column] : typed_paths) { - serializeTextEscaped(column, row_num, ostr, settings); + if (column->size() == prev_size) + column->insertDefault(); + } + + for (auto & [_, column] : column_object.getDynamicPathsPtrs()) + { + if (column->size() == prev_size) + column->insertDefault(); } } -template -void SerializationObject::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const +SerializationPtr SerializationObject::TypedPathSubcolumnCreator::create(const DB::SerializationPtr & prev) const { - const auto & column_object = assert_cast(column); - const auto & subcolumns = column_object.getSubcolumns(); - - writeCString("{\n", ostr); - for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it) - { - const auto & entry = *it; - if (it != subcolumns.begin()) - writeCString(",\n", ostr); - - writeChar(' ', (indent + 1) * 4, ostr); - writeDoubleQuoted(entry->path.getPath(), ostr); - writeCString(": ", ostr); - serializeTextFromSubcolumn(entry->data, row_num, ostr, settings, indent + 1); - } - writeChar('\n', ostr); - writeChar(' ', indent * 4, ostr); - writeChar('}', ostr); -} - - -SerializationPtr getObjectSerialization(const String & schema_format) -{ - if (schema_format == "json") - { -#if USE_SIMDJSON - return std::make_shared>>(); -#elif USE_RAPIDJSON - return std::make_shared>>(); -#else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "To use data type Object with JSON format ClickHouse should be built with Simdjson or Rapidjson"); -#endif - } - - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown schema format '{}'", schema_format); + return std::make_shared(prev, path); } } + diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 4cb7d0ab6a8..62ff9849f45 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -1,34 +1,43 @@ #pragma once #include -#include -#include +#include +#include namespace DB { -/** Serialization for data type Object. - * Supported only text serialization/deserialization. - * and binary bulk serialization/deserialization without position independent - * encoding, i.e. serialization/deserialization into Native format. - */ -template +class SerializationObjectDynamicPath; +class SerializationSubObject; + +/// Class for binary serialization/deserialization of an Object type (currently only JSON). class SerializationObject : public ISerialization { public: - /** In Native format ColumnObject can be serialized - * in two formats: as Tuple or as String. - * The format is the following: - * - * 1 byte -- 0 if Tuple, 1 if String. - * [type_name] -- Only for tuple serialization. - * ... data of internal column ... - * - * ClickHouse client serializazes objects as tuples. - * String serialization exists for clients, which cannot - * do parsing by themselves and they can send raw data as - * string. It will be parsed on the server side. - */ + /// Serialization can change in future. Let's introduce serialization version. + struct ObjectSerializationVersion + { + enum Value + { + BASIC = 0, + }; + + Value value; + + static void checkVersion(UInt64 version); + + explicit ObjectSerializationVersion(UInt64 version); + }; + + SerializationObject( + std::unordered_map typed_path_serializations_, + const std::unordered_set & paths_to_skip_, + const std::vector & path_regexps_to_skip_); + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( const IColumn & column, @@ -63,59 +72,55 @@ public: void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; - void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - - void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + static void restoreColumnObject(ColumnObject & column_object, size_t prev_size); private: - enum class BinarySerializationKind : UInt8 + friend SerializationObjectDynamicPath; + friend SerializationSubObject; + + /// State of an Object structure. Can be also used during deserializing of Object subcolumns. + struct DeserializeBinaryBulkStateObjectStructure : public ISerialization::DeserializeBinaryBulkState { - TUPLE = 0, - STRING = 1, + ObjectSerializationVersion structure_version; + size_t max_dynamic_paths; + std::vector sorted_dynamic_paths; + std::unordered_set dynamic_paths; + /// Paths statistics. Map (dynamic path) -> (number of non-null values in this path). + ColumnObject::StatisticsPtr statistics; + + explicit DeserializeBinaryBulkStateObjectStructure(UInt64 structure_version_) : structure_version(structure_version_) {} }; - struct SerializeStateObject; - struct DeserializeStateObject; - - void deserializeBinaryBulkFromString( - ColumnObject & column_object, - size_t limit, + static DeserializeBinaryBulkStatePtr deserializeObjectStructureStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeStateObject & state, - SubstreamsCache * cache) const; + SubstreamsDeserializeStatesCache * cache); - void deserializeBinaryBulkFromTuple( - ColumnObject & column_object, - size_t limit, - DeserializeBinaryBulkSettings & settings, - DeserializeStateObject & state, - SubstreamsCache * cache) const; + /// Shared data has type Array(Tuple(String, String)). + static const DataTypePtr & getTypeOfSharedData(); - template - void checkSerializationIsSupported(const TSettings & settings) const; + struct TypedPathSubcolumnCreator : public ISubcolumnCreator + { + String path; - template - void deserializeTextImpl(IColumn & column, Reader && reader) const; + explicit TypedPathSubcolumnCreator(const String & path_) : path(path_) {} - void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + DataTypePtr create(const DataTypePtr & prev) const override { return prev; } + ColumnPtr create(const ColumnPtr & prev) const override { return prev; } + SerializationPtr create(const SerializationPtr & prev) const override; + }; - template - void serializeTextFromSubcolumn(const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent = 0) const; +protected: + bool shouldSkipPath(const String & path) const; - /// Pool of parser objects to make SerializationObject thread safe. - mutable SimpleObjectPool parsers_pool; + std::unordered_map typed_path_serializations; + std::unordered_set paths_to_skip; + std::vector sorted_paths_to_skip; + std::list path_regexps_to_skip; + SerializationPtr dynamic_serialization; + +private: + std::vector sorted_typed_paths; + SerializationPtr shared_data_serialization; }; -SerializationPtr getObjectSerialization(const String & schema_format); - } diff --git a/src/DataTypes/Serializations/SerializationObjectDeprecated.cpp b/src/DataTypes/Serializations/SerializationObjectDeprecated.cpp new file mode 100644 index 00000000000..4e9ebf6c03d --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectDeprecated.cpp @@ -0,0 +1,586 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int INCORRECT_DATA; + extern const int CANNOT_READ_ALL_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_PARSE_TEXT; + extern const int EXPERIMENTAL_FEATURE_ERROR; +} + +template +template +void SerializationObjectDeprecated::deserializeTextImpl(IColumn & column, Reader && reader) const +{ + auto & column_object = assert_cast(column); + + String buf; + reader(buf); + std::optional result; + + /// Treat empty string as an empty object + /// for better CAST from String to Object. + if (!buf.empty()) + { + auto parser = parsers_pool.get([] { return new Parser; }); + result = parser->parse(buf.data(), buf.size()); + } + else + { + result = ParseResult{}; + } + + if (!result) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse object"); + + auto & [paths, values] = *result; + assert(paths.size() == values.size()); + + size_t old_column_size = column_object.size(); + for (size_t i = 0; i < paths.size(); ++i) + { + auto field_info = getFieldInfo(values[i]); + if (field_info.need_fold_dimension) + values[i] = applyVisitor(FieldVisitorFoldDimension(field_info.num_dimensions), std::move(values[i])); + if (isNothing(field_info.scalar_type)) + continue; + + if (!column_object.hasSubcolumn(paths[i])) + { + if (paths[i].hasNested()) + column_object.addNestedSubcolumn(paths[i], field_info, old_column_size); + else + column_object.addSubcolumn(paths[i], old_column_size); + } + + auto & subcolumn = column_object.getSubcolumn(paths[i]); + assert(subcolumn.size() == old_column_size); + + subcolumn.insert(std::move(values[i]), std::move(field_info)); + } + + /// Insert default values to missed subcolumns. + const auto & subcolumns = column_object.getSubcolumns(); + for (const auto & entry : subcolumns) + { + if (entry->data.size() == old_column_size) + { + bool inserted = column_object.tryInsertDefaultFromNested(entry); + if (!inserted) + entry->data.insertDefault(); + } + } + + column_object.incrementNumRows(); +} + +template +void SerializationObjectDeprecated::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + deserializeTextImpl(column, [&](String & s) { readStringInto(s, istr); }); +} + +template +void SerializationObjectDeprecated::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextImpl(column, [&](String & s) { settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); }); +} + +template +void SerializationObjectDeprecated::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + deserializeTextImpl(column, [&](String & s) { readQuotedStringInto(s, istr); }); +} + +template +void SerializationObjectDeprecated::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + deserializeTextImpl(column, [&](String & s) { Parser::readJSON(s, istr); }); +} + +template +void SerializationObjectDeprecated::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextImpl(column, [&](String & s) { readCSVStringInto(s, istr, settings.csv); }); +} + +template +template +void SerializationObjectDeprecated::checkSerializationIsSupported(const TSettings & settings) const +{ + if (settings.position_independent_encoding) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DataTypeObject doesn't support serialization with position independent encoding"); +} + +template +struct SerializationObjectDeprecated::SerializeStateObject : public ISerialization::SerializeBinaryBulkState +{ + DataTypePtr nested_type; + SerializationPtr nested_serialization; + SerializeBinaryBulkStatePtr nested_state; +}; + +template +struct SerializationObjectDeprecated::DeserializeStateObject : public ISerialization::DeserializeBinaryBulkState +{ + BinarySerializationKind kind; + DataTypePtr nested_type; + SerializationPtr nested_serialization; + DeserializeBinaryBulkStatePtr nested_state; +}; + +template +void SerializationObjectDeprecated::serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + checkSerializationIsSupported(settings); + if (state) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DataTypeObject doesn't support serialization with non-trivial state"); + + const auto & column_object = assert_cast(column); + if (!column_object.isFinalized()) + { + auto finalized = column_object.cloneFinalized(); + serializeBinaryBulkStatePrefix(*finalized, settings, state); + return; + } + + settings.path.push_back(Substream::DeprecatedObjectStructure); + auto * stream = settings.getter(settings.path); + + if (!stream) + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Missing stream for kind of binary serialization"); + + auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object); + + writeIntBinary(static_cast(BinarySerializationKind::TUPLE), *stream); + writeStringBinary(tuple_type->getName(), *stream); + + auto state_object = std::make_shared(); + state_object->nested_type = tuple_type; + state_object->nested_serialization = tuple_type->getDefaultSerialization(); + + settings.path.back() = Substream::DeprecatedObjectData; + state_object->nested_serialization->serializeBinaryBulkStatePrefix(*tuple_column, settings, state_object->nested_state); + + state = std::move(state_object); + settings.path.pop_back(); +} + +template +void SerializationObjectDeprecated::serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + checkSerializationIsSupported(settings); + auto * state_object = checkAndGetState(state); + + settings.path.push_back(Substream::DeprecatedObjectData); + state_object->nested_serialization->serializeBinaryBulkStateSuffix(settings, state_object->nested_state); + settings.path.pop_back(); +} + +template +void SerializationObjectDeprecated::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const +{ + checkSerializationIsSupported(settings); + if (state) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DataTypeObject doesn't support serialization with non-trivial state"); + + settings.path.push_back(Substream::DeprecatedObjectStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read kind of binary serialization of DataTypeObject, because its stream is missing"); + + UInt8 kind_raw; + readIntBinary(kind_raw, *stream); + auto kind = magic_enum::enum_cast(kind_raw); + if (!kind) + throw Exception(ErrorCodes::INCORRECT_DATA, + "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw)); + + auto state_object = std::make_shared(); + state_object->kind = *kind; + + if (state_object->kind == BinarySerializationKind::TUPLE) + { + String data_type_name; + readStringBinary(data_type_name, *stream); + state_object->nested_type = DataTypeFactory::instance().get(data_type_name); + state_object->nested_serialization = state_object->nested_type->getDefaultSerialization(); + + if (!isTuple(state_object->nested_type)) + throw Exception(ErrorCodes::INCORRECT_DATA, + "Data of type Object should be written as Tuple, got: {}", data_type_name); + } + else if (state_object->kind == BinarySerializationKind::STRING) + { + state_object->nested_type = std::make_shared(); + state_object->nested_serialization = std::make_shared(); + } + else + { + throw Exception(ErrorCodes::INCORRECT_DATA, + "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw)); + } + + settings.path.push_back(Substream::DeprecatedObjectData); + state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache); + settings.path.pop_back(); + + state = std::move(state_object); +} + +template +void SerializationObjectDeprecated::serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + checkSerializationIsSupported(settings); + const auto & column_object = assert_cast(column); + auto * state_object = checkAndGetState(state); + + if (!column_object.isFinalized()) + { + auto finalized = column_object.cloneFinalized(); + serializeBinaryBulkWithMultipleStreams(*finalized, offset, limit, settings, state); + return; + } + + auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object); + + if (!state_object->nested_type->equals(*tuple_type)) + { + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Types of internal column of Object mismatched. Expected: {}, Got: {}", + state_object->nested_type->getName(), tuple_type->getName()); + } + + settings.path.push_back(Substream::DeprecatedObjectData); + if (auto * stream = settings.getter(settings.path)) + { + state_object->nested_serialization->serializeBinaryBulkWithMultipleStreams( + *tuple_column, offset, limit, settings, state_object->nested_state); + } + + settings.path.pop_back(); +} + +template +void SerializationObjectDeprecated::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + checkSerializationIsSupported(settings); + if (!column->empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DataTypeObject cannot be deserialized to non-empty column"); + + auto mutable_column = column->assumeMutable(); + auto & column_object = assert_cast(*mutable_column); + auto * state_object = checkAndGetState(state); + + settings.path.push_back(Substream::DeprecatedObjectData); + if (state_object->kind == BinarySerializationKind::STRING) + deserializeBinaryBulkFromString(column_object, limit, settings, *state_object, cache); + else + deserializeBinaryBulkFromTuple(column_object, limit, settings, *state_object, cache); + + settings.path.pop_back(); + column_object.checkConsistency(); + column_object.finalize(); + column = std::move(mutable_column); +} + +template +void SerializationObjectDeprecated::deserializeBinaryBulkFromString( + ColumnObjectDeprecated & column_object, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeStateObject & state, + SubstreamsCache * cache) const +{ + ColumnPtr column_string = state.nested_type->createColumn(); + state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( + column_string, limit, settings, state.nested_state, cache); + + size_t input_rows_count = column_string->size(); + column_object.reserve(input_rows_count); + + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto & val = column_string->getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + deserializeWholeText(column_object, read_buffer, format_settings); + + if (!read_buffer.eof()) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column Object. Expected eof"); + } +} + +template +void SerializationObjectDeprecated::deserializeBinaryBulkFromTuple( + ColumnObjectDeprecated & column_object, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeStateObject & state, + SubstreamsCache * cache) const +{ + ColumnPtr column_tuple = state.nested_type->createColumn(); + state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( + column_tuple, limit, settings, state.nested_state, cache); + + auto [tuple_paths, tuple_types] = flattenTuple(state.nested_type); + auto flattened_tuple = flattenTuple(column_tuple); + const auto & tuple_columns = assert_cast(*flattened_tuple).getColumns(); + + assert(tuple_paths.size() == tuple_types.size()); + size_t num_subcolumns = tuple_paths.size(); + + if (tuple_columns.size() != num_subcolumns) + throw Exception(ErrorCodes::INCORRECT_DATA, + "Inconsistent type ({}) and column ({}) while reading column of type Object", + state.nested_type->getName(), column_tuple->getName()); + + for (size_t i = 0; i < num_subcolumns; ++i) + column_object.addSubcolumn(tuple_paths[i], tuple_columns[i]->assumeMutable()); +} + +template +void SerializationObjectDeprecated::serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObjectDeprecated"); +} + +template +void SerializationObjectDeprecated::deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObjectDeprecated"); +} + +template +void SerializationObjectDeprecated::serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObjectDeprecated"); +} + +template +void SerializationObjectDeprecated::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObjectDeprecated"); +} + +/// TODO: use format different of JSON in serializations. + +template +void SerializationObjectDeprecated::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_object = assert_cast(column); + const auto & subcolumns = column_object.getSubcolumns(); + + writeChar('{', ostr); + for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it) + { + const auto & entry = *it; + if (it != subcolumns.begin()) + writeCString(",", ostr); + + writeDoubleQuoted(entry->path.getPath(), ostr); + writeChar(':', ostr); + serializeTextFromSubcolumn(entry->data, row_num, ostr, settings); + } + writeChar('}', ostr); +} + +template +template +void SerializationObjectDeprecated::serializeTextFromSubcolumn( + const ColumnObjectDeprecated::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const +{ + const auto & least_common_type = subcolumn.getLeastCommonType(); + + if (subcolumn.isFinalized()) + { + const auto & finalized_column = subcolumn.getFinalizedColumn(); + auto info = least_common_type->getSerializationInfo(finalized_column); + auto serialization = least_common_type->getSerialization(*info); + if constexpr (pretty_json) + serialization->serializeTextJSONPretty(finalized_column, row_num, ostr, settings, indent); + else + serialization->serializeTextJSON(finalized_column, row_num, ostr, settings); + return; + } + + size_t ind = row_num; + if (ind < subcolumn.getNumberOfDefaultsInPrefix()) + { + /// Suboptimal, but it should happen rarely. + auto tmp_column = subcolumn.getLeastCommonType()->createColumn(); + tmp_column->insertDefault(); + + auto info = least_common_type->getSerializationInfo(*tmp_column); + auto serialization = least_common_type->getSerialization(*info); + if constexpr (pretty_json) + serialization->serializeTextJSONPretty(*tmp_column, 0, ostr, settings, indent); + else + serialization->serializeTextJSON(*tmp_column, 0, ostr, settings); + return; + } + + ind -= subcolumn.getNumberOfDefaultsInPrefix(); + for (const auto & part : subcolumn.getData()) + { + if (ind < part->size()) + { + auto part_type = getDataTypeByColumn(*part); + auto info = part_type->getSerializationInfo(*part); + auto serialization = part_type->getSerialization(*info); + if constexpr (pretty_json) + serialization->serializeTextJSONPretty(*part, ind, ostr, settings, indent); + else + serialization->serializeTextJSON(*part, ind, ostr, settings); + return; + } + + ind -= part->size(); + } + + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for text serialization is out of range", row_num); +} + +template +void SerializationObjectDeprecated::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextImpl(column, row_num, ostr, settings); +} + +template +void SerializationObjectDeprecated::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeEscapedString(ostr_str.str(), ostr); +} + +template +void SerializationObjectDeprecated::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeQuotedString(ostr_str.str(), ostr); +} + +template +void SerializationObjectDeprecated::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextImpl(column, row_num, ostr, settings); +} + +template +void SerializationObjectDeprecated::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeCSVString(ostr_str.str(), ostr); +} + +template +void SerializationObjectDeprecated::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.markdown.escape_special_characters) + { + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeMarkdownEscapedString(ostr_str.str(), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} + +template +void SerializationObjectDeprecated::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const +{ + const auto & column_object = assert_cast(column); + const auto & subcolumns = column_object.getSubcolumns(); + + writeCString("{\n", ostr); + for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it) + { + const auto & entry = *it; + if (it != subcolumns.begin()) + writeCString(",\n", ostr); + + writeChar(' ', (indent + 1) * 4, ostr); + writeDoubleQuoted(entry->path.getPath(), ostr); + writeCString(": ", ostr); + serializeTextFromSubcolumn(entry->data, row_num, ostr, settings, indent + 1); + } + writeChar('\n', ostr); + writeChar(' ', indent * 4, ostr); + writeChar('}', ostr); +} + + +SerializationPtr getObjectSerialization(const String & schema_format) +{ + if (schema_format == "json") + { +#if USE_SIMDJSON + return std::make_shared>>(); +#elif USE_RAPIDJSON + return std::make_shared>>(); +#else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "To use data type Object with JSON format ClickHouse should be built with Simdjson or Rapidjson"); +#endif + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown schema format '{}'", schema_format); +} + +} diff --git a/src/DataTypes/Serializations/SerializationObjectDeprecated.h b/src/DataTypes/Serializations/SerializationObjectDeprecated.h new file mode 100644 index 00000000000..c209f946850 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectDeprecated.h @@ -0,0 +1,121 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** Serialization for data type Object (deprecated). + * Supported only text serialization/deserialization. + * and binary bulk serialization/deserialization without position independent + * encoding, i.e. serialization/deserialization into Native format. + */ +template +class SerializationObjectDeprecated : public ISerialization +{ +public: + /** In Native format ColumnObjectDeprecated can be serialized + * in two formats: as Tuple or as String. + * The format is the following: + * + * 1 byte -- 0 if Tuple, 1 if String. + * [type_name] -- Only for tuple serialization. + * ... data of internal column ... + * + * ClickHouse client serializazes objects as tuples. + * String serialization exists for clients, which cannot + * do parsing by themselves and they can send raw data as + * string. It will be parsed on the server side. + */ + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + +private: + enum class BinarySerializationKind : UInt8 + { + TUPLE = 0, + STRING = 1, + }; + + struct SerializeStateObject; + struct DeserializeStateObject; + + void deserializeBinaryBulkFromString( + ColumnObjectDeprecated & column_object, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeStateObject & state, + SubstreamsCache * cache) const; + + void deserializeBinaryBulkFromTuple( + ColumnObjectDeprecated & column_object, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeStateObject & state, + SubstreamsCache * cache) const; + + template + void checkSerializationIsSupported(const TSettings & settings) const; + + template + void deserializeTextImpl(IColumn & column, Reader && reader) const; + + void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + + template + void serializeTextFromSubcolumn(const ColumnObjectDeprecated::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent = 0) const; + + /// Pool of parser objects to make SerializationObjectDeprecated thread safe. + mutable SimpleObjectPool parsers_pool; +}; + +SerializationPtr getObjectSerialization(const String & schema_format); + +} diff --git a/src/DataTypes/Serializations/SerializationObjectDynamicPath.cpp b/src/DataTypes/Serializations/SerializationObjectDynamicPath.cpp new file mode 100644 index 00000000000..5323079c54b --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectDynamicPath.cpp @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +SerializationObjectDynamicPath::SerializationObjectDynamicPath( + const DB::SerializationPtr & nested_, const String & path_, const String & path_subcolumn_, size_t max_dynamic_types_) + : SerializationWrapper(nested_) + , path(path_) + , path_subcolumn(path_subcolumn_) + , dynamic_serialization(std::make_shared()) + , shared_data_serialization(SerializationObject::getTypeOfSharedData()->getDefaultSerialization()) + , max_dynamic_types(max_dynamic_types_) +{ +} + +struct DeserializeBinaryBulkStateObjectDynamicPath : public ISerialization::DeserializeBinaryBulkState +{ + ISerialization::DeserializeBinaryBulkStatePtr structure_state; + ISerialization::DeserializeBinaryBulkStatePtr nested_state; + bool read_from_shared_data; + ColumnPtr shared_data; +}; + +void SerializationObjectDynamicPath::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + settings.path.push_back(Substream::ObjectStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + + /// We cannot enumerate anything if we don't have deserialization state, as we don't know the dynamic structure. + if (!deserialize_state) + return; + + settings.path.push_back(Substream::ObjectData); + const auto * structure_state = checkAndGetState(deserialize_state->structure_state); + /// Check if we have our path in dynamic paths. + if (structure_state->dynamic_paths.contains(path)) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + auto path_data = SubstreamData(nested_serialization) + .withType(data.type) + .withColumn(data.column) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state->nested_state); + settings.path.back().data = path_data; + nested_serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); + } + /// Otherwise we will have to read all shared data and try to find our path there. + else + { + settings.path.push_back(Substream::ObjectSharedData); + auto shared_data_substream_data = SubstreamData(shared_data_serialization) + .withType(data.type ? SerializationObject::getTypeOfSharedData() : nullptr) + .withColumn(data.column ? SerializationObject::getTypeOfSharedData()->createColumn() : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state->nested_state); + settings.path.back().data = shared_data_substream_data; + shared_data_serialization->enumerateStreams(settings, callback, shared_data_substream_data); + settings.path.pop_back(); + } + + settings.path.pop_back(); +} + +void SerializationObjectDynamicPath::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationObjectDynamicPath"); +} + +void SerializationObjectDynamicPath::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationObjectDynamicPath"); +} + +void SerializationObjectDynamicPath::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + auto structure_state = SerializationObject::deserializeObjectStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto dynamic_path_state = std::make_shared(); + dynamic_path_state->structure_state = std::move(structure_state); + /// Remember if we need to read from shared data or we have this path in dynamic paths. + dynamic_path_state->read_from_shared_data = !checkAndGetState(dynamic_path_state->structure_state)->dynamic_paths.contains(path); + settings.path.push_back(Substream::ObjectData); + if (dynamic_path_state->read_from_shared_data) + { + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_path_state->nested_state, cache); + settings.path.pop_back(); + } + else + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + nested_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_path_state->nested_state, cache); + settings.path.pop_back(); + } + + settings.path.pop_back(); + state = std::move(dynamic_path_state); +} + +void SerializationObjectDynamicPath::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationObjectDynamicPath"); +} + +void SerializationObjectDynamicPath::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + if (!state) + return; + + auto * dynamic_path_state = checkAndGetState(state); + settings.path.push_back(Substream::ObjectData); + /// Check if we don't need to read shared data. In this case just read data from dynamic path. + if (!dynamic_path_state->read_from_shared_data) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + nested_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_path_state->nested_state, cache); + settings.path.pop_back(); + } + /// Otherwise, read the whole shared data column and extract requested path from it. + /// TODO: We can read several subcolumns of the same path located in the shared data + /// and right now we extract the whole path column from shared data every time + /// and then extract the requested subcolumns. We can optimize it and use substreams + /// cache here to avoid extracting the same path from shared data several times. + /// + /// TODO: We can change the serialization of shared data to optimize reading paths from it. + /// Right now we cannot know if shared data contains our path in current range or not, + /// but we can change the serialization and write the list of all paths stored in shared + /// data before each granule, and then replace the column that stores paths with column + /// with indexes in this list. It can also reduce the storage, because we will store + /// each path only once and can replace UInt64 string offset column with indexes column + /// that can have smaller type depending on the number of paths in the list. + else + { + settings.path.push_back(Substream::ObjectSharedData); + /// Initialize shared_data column if needed. + if (result_column->empty()) + dynamic_path_state->shared_data = SerializationObject::getTypeOfSharedData()->createColumn(); + size_t prev_size = result_column->size(); + shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(dynamic_path_state->shared_data, limit, settings, dynamic_path_state->nested_state, cache); + /// If we need to read a subcolumn from Dynamic column, create an empty Dynamic column, fill it and extract subcolumn. + MutableColumnPtr dynamic_column = path_subcolumn.empty() ? result_column->assumeMutable() : ColumnDynamic::create(max_dynamic_types)->getPtr(); + /// Check if we don't have any paths in shared data in current range. + const auto & offsets = assert_cast(*dynamic_path_state->shared_data).getOffsets(); + if (offsets.back() == offsets[ssize_t(prev_size) - 1]) + dynamic_column->insertManyDefaults(limit); + else + ColumnObject::fillPathColumnFromSharedData(*dynamic_column, path, dynamic_path_state->shared_data, prev_size, dynamic_path_state->shared_data->size()); + + /// Extract subcolumn from Dynamic column if needed. + if (!path_subcolumn.empty()) + { + auto subcolumn = std::make_shared(max_dynamic_types)->getSubcolumn(path_subcolumn, dynamic_column->getPtr()); + result_column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size()); + } + + settings.path.pop_back(); + } + + settings.path.pop_back(); +} + +} diff --git a/src/DataTypes/Serializations/SerializationObjectDynamicPath.h b/src/DataTypes/Serializations/SerializationObjectDynamicPath.h new file mode 100644 index 00000000000..e11d0cded73 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectDynamicPath.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace DB +{ + +/// Serialization of dynamic Object paths. +/// For example, if we have type JSON(a.b UInt32, b.c String) and data {"a" : {"b" : 42}, "b" : {"c" : "Hello}, "c" : {"d" : [1, 2, 3]}, "d" : 42} +/// this class will be responsible for reading dynamic paths 'c.d' and 'd' as subcolumns. +/// Typed paths 'a.b' and 'b.c' are serialized in SerializationObjectTypedPath. +class SerializationObjectDynamicPath final : public SerializationWrapper +{ +public: + SerializationObjectDynamicPath(const SerializationPtr & nested_, const String & path_, const String & path_subcolumn_, size_t max_dynamic_types_); + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + +private: + String path; + String path_subcolumn; + SerializationPtr dynamic_serialization; + SerializationPtr shared_data_serialization; + size_t max_dynamic_types; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationObjectTypedPath.cpp b/src/DataTypes/Serializations/SerializationObjectTypedPath.cpp new file mode 100644 index 00000000000..ef086d486f7 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectTypedPath.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + +void SerializationObjectTypedPath::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + settings.path.push_back(Substream::ObjectData); + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + auto path_data = SubstreamData(nested_serialization) + .withType(data.type) + .withColumn(data.column) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(data.deserialize_state); + nested_serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); + settings.path.pop_back(); +} + +void SerializationObjectTypedPath::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationObjectTypedPath"); +} + +void SerializationObjectTypedPath::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationObjectTypedPath"); +} + +void SerializationObjectTypedPath::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + settings.path.push_back(Substream::ObjectData); + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); + settings.path.pop_back(); + settings.path.pop_back(); +} + +void SerializationObjectTypedPath::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationObjectTypedPath"); +} + +void SerializationObjectTypedPath::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + settings.path.push_back(Substream::ObjectData); + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + nested_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, state, cache); + settings.path.pop_back(); + settings.path.pop_back(); +} + +} diff --git a/src/DataTypes/Serializations/SerializationObjectTypedPath.h b/src/DataTypes/Serializations/SerializationObjectTypedPath.h new file mode 100644 index 00000000000..997e14bd145 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationObjectTypedPath.h @@ -0,0 +1,57 @@ +#pragma once + +#include + +namespace DB +{ + +/// Serialization of typed Object paths. +/// For example, for type JSON(a.b UInt32, b.c String) this serialization +/// will be used to read paths 'a.b' and 'b.c' as subcolumns. +class SerializationObjectTypedPath final : public SerializationWrapper +{ +public: + SerializationObjectTypedPath(const SerializationPtr & nested_, const String & path_) + : SerializationWrapper(nested_) + , path(path_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + +private: + String path; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e523d0d745..ac5d4e3e128 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const String & s = field.get(); + const String & s = field.safeGet(); if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, @@ -59,7 +59,7 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co settings.binary.max_binary_string_size); field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(size); istr.readStrict(s.data(), size); } diff --git a/src/DataTypes/Serializations/SerializationSubObject.cpp b/src/DataTypes/Serializations/SerializationSubObject.cpp new file mode 100644 index 00000000000..9084d46f9b2 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationSubObject.cpp @@ -0,0 +1,259 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +SerializationSubObject::SerializationSubObject( + const String & path_prefix_, const std::unordered_map & typed_paths_serializations_) + : path_prefix(path_prefix_) + , typed_paths_serializations(typed_paths_serializations_) + , dynamic_serialization(std::make_shared()) + , shared_data_serialization(SerializationObject::getTypeOfSharedData()->getDefaultSerialization()) +{ +} + +struct DeserializeBinaryBulkStateSubObject : public ISerialization::DeserializeBinaryBulkState +{ + std::unordered_map typed_path_states; + std::unordered_map dynamic_path_states; + std::vector dynamic_paths; + std::vector dynamic_sub_paths; + ISerialization::DeserializeBinaryBulkStatePtr shared_data_state; + ColumnPtr shared_data; +}; + +void SerializationSubObject::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + settings.path.push_back(Substream::ObjectStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * column_object = data.column ? &assert_cast(*data.column) : nullptr; + const auto * type_object = data.type ? &assert_cast(*data.type) : nullptr; + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; + + settings.path.push_back(Substream::ObjectData); + + /// typed_paths_serializations contains only typed paths with requested prefix from original Object column. + for (const auto & [path, serialization] : typed_paths_serializations) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + auto path_data = SubstreamData(serialization) + .withType(type_object ? type_object->getTypedPaths().at(path.substr(path_prefix.size() + 1)) : nullptr) + .withColumn(column_object ? column_object->getTypedPaths().at(path.substr(path_prefix.size() + 1)) : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->typed_path_states.at(path) : nullptr); + settings.path.back().data = path_data; + serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); + } + + /// We will need to read shared data to find all paths with requested prefix. + settings.path.push_back(Substream::ObjectSharedData); + auto shared_data_substream_data = SubstreamData(shared_data_serialization) + .withType(data.type ? SerializationObject::getTypeOfSharedData() : nullptr) + .withColumn(data.column ? SerializationObject::getTypeOfSharedData()->createColumn() : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(deserialize_state ? deserialize_state->shared_data_state : nullptr); + settings.path.back().data = shared_data_substream_data; + shared_data_serialization->enumerateStreams(settings, callback, shared_data_substream_data); + settings.path.pop_back(); + + /// If deserialize state is provided, enumerate streams for dynamic paths. + if (deserialize_state) + { + DataTypePtr type = std::make_shared(); + for (const auto & [path, state] : deserialize_state->dynamic_path_states) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + auto path_data = SubstreamData(dynamic_serialization) + .withType(type_object ? type : nullptr) + .withColumn(nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializeState(state); + settings.path.back().data = path_data; + dynamic_serialization->enumerateStreams(settings, callback, path_data); + settings.path.pop_back(); + } + } + + settings.path.pop_back(); +} + +void SerializationSubObject::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationSubObject"); +} + +void SerializationSubObject::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationSubObject"); +} + +namespace +{ + +/// Return sub-path by specified prefix. +/// For example, for prefix a.b: +/// a.b.c.d -> c.d, a.b.c -> c +String getSubPath(const String & path, const String & prefix) +{ + return path.substr(prefix.size() + 1); +} + +std::string_view getSubPath(const std::string_view & path, const String & prefix) +{ + return path.substr(prefix.size() + 1); +} + +} + +void SerializationSubObject::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + auto structure_state = SerializationObject::deserializeObjectStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto sub_object_state = std::make_shared(); + settings.path.push_back(Substream::ObjectData); + for (const auto & [path, serialization] : typed_paths_serializations) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + serialization->deserializeBinaryBulkStatePrefix(settings, sub_object_state->typed_path_states[path], cache); + settings.path.pop_back(); + } + + for (const auto & dynamic_path : checkAndGetState(structure_state)->sorted_dynamic_paths) + { + /// Save only dynamic paths with requested prefix. + if (dynamic_path.starts_with(path_prefix) && dynamic_path.size() != path_prefix.size()) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = dynamic_path; + dynamic_serialization->deserializeBinaryBulkStatePrefix(settings, sub_object_state->dynamic_path_states[dynamic_path], cache); + settings.path.pop_back(); + sub_object_state->dynamic_paths.push_back(dynamic_path); + sub_object_state->dynamic_sub_paths.push_back(getSubPath(dynamic_path, path_prefix)); + } + } + + settings.path.push_back(Substream::ObjectSharedData); + shared_data_serialization->deserializeBinaryBulkStatePrefix(settings, sub_object_state->shared_data_state, cache); + settings.path.pop_back(); + + settings.path.pop_back(); + state = std::move(sub_object_state); +} + +void SerializationSubObject::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationSubObject"); +} + +void SerializationSubObject::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + if (!state) + return; + + auto * sub_object_state = checkAndGetState(state); + auto mutable_column = result_column->assumeMutable(); + auto & column_object = assert_cast(*mutable_column); + /// If it's a new object column, set dynamic paths and statistics. + if (column_object.empty()) + column_object.setDynamicPaths(sub_object_state->dynamic_sub_paths); + + auto & typed_paths = column_object.getTypedPaths(); + auto & dynamic_paths = column_object.getDynamicPaths(); + + settings.path.push_back(Substream::ObjectData); + for (const auto & [path, serialization] : typed_paths_serializations) + { + settings.path.push_back(Substream::ObjectTypedPath); + settings.path.back().object_path_name = path; + serialization->deserializeBinaryBulkWithMultipleStreams(typed_paths[getSubPath(path, path_prefix)], limit, settings, sub_object_state->typed_path_states[path], cache); + settings.path.pop_back(); + } + + for (const auto & path : sub_object_state->dynamic_paths) + { + settings.path.push_back(Substream::ObjectDynamicPath); + settings.path.back().object_path_name = path; + dynamic_serialization->deserializeBinaryBulkWithMultipleStreams(dynamic_paths[getSubPath(path, path_prefix)], limit, settings, sub_object_state->dynamic_path_states[path], cache); + settings.path.pop_back(); + } + + settings.path.push_back(Substream::ObjectSharedData); + /// If it's a new object column, reinitialize column for shared data. + if (result_column->empty()) + sub_object_state->shared_data = SerializationObject::getTypeOfSharedData()->createColumn(); + size_t prev_size = column_object.size(); + shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(sub_object_state->shared_data, limit, settings, sub_object_state->shared_data_state, cache); + settings.path.pop_back(); + + auto & sub_object_shared_data = column_object.getSharedDataColumn(); + const auto & offsets = assert_cast(*sub_object_state->shared_data).getOffsets(); + /// Check if there is no data in shared data in current range. + if (offsets.back() == offsets[ssize_t(prev_size) - 1]) + { + sub_object_shared_data.insertManyDefaults(limit); + } + else + { + const auto & shared_data_array = assert_cast(*sub_object_state->shared_data); + const auto & shared_data_offsets = shared_data_array.getOffsets(); + const auto & shared_data_tuple = assert_cast(shared_data_array.getData()); + const auto & shared_data_paths = assert_cast(shared_data_tuple.getColumn(0)); + const auto & shared_data_values = assert_cast(shared_data_tuple.getColumn(1)); + + auto & sub_object_data_offsets = column_object.getSharedDataOffsets(); + auto [sub_object_shared_data_paths, sub_object_shared_data_values] = column_object.getSharedDataPathsAndValues(); + StringRef prefix_ref(path_prefix); + for (size_t i = prev_size; i != shared_data_offsets.size(); ++i) + { + size_t start = shared_data_offsets[ssize_t(i) - 1]; + size_t end = shared_data_offsets[ssize_t(i)]; + size_t lower_bound_index = ColumnObject::findPathLowerBoundInSharedData(prefix_ref, shared_data_paths, start, end); + for (; lower_bound_index != end; ++lower_bound_index) + { + auto path = shared_data_paths.getDataAt(lower_bound_index).toView(); + if (!path.starts_with(path_prefix)) + break; + + /// Don't include path that is equal to the prefix. + if (path.size() != path_prefix.size()) + { + auto sub_path = getSubPath(path, path_prefix); + sub_object_shared_data_paths->insertData(sub_path.data(), sub_path.size()); + sub_object_shared_data_values->insertFrom(shared_data_values, lower_bound_index); + } + } + sub_object_data_offsets.push_back(sub_object_shared_data_paths->size()); + } + } + settings.path.pop_back(); +} + +} diff --git a/src/DataTypes/Serializations/SerializationSubObject.h b/src/DataTypes/Serializations/SerializationSubObject.h new file mode 100644 index 00000000000..10973b48957 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationSubObject.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/// Serialization of a sub-object Object subcolumns. +/// For example, if we have type JSON and data {"a" : {"b" : {"c" : 42, "d" : "Hello"}}, "c" : [1, 2, 3], "d" : 42} +/// this class will be responsible for reading sub-object a.b and will read JSON column with data {"c" : 43, "d" : "Hello"}. +class SerializationSubObject final : public SimpleTextSerialization +{ +public: + SerializationSubObject(const String & path_prefix_, const std::unordered_map & typed_paths_serializations_); + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for object sub-object subcolumn"); + } + + String path_prefix; + std::unordered_map typed_paths_serializations; + SerializationPtr dynamic_serialization; + SerializationPtr shared_data_serialization; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 7a5227ca752..594a23ab507 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -34,7 +34,7 @@ static inline const IColumn & extractElementColumn(const IColumn & column, size_ void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & tuple = field.get(); + const auto & tuple = field.safeGet(); for (size_t element_index = 0; element_index < elems.size(); ++element_index) { const auto & serialization = elems[element_index]; @@ -47,7 +47,7 @@ void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr, con const size_t size = elems.size(); field = Tuple(); - Tuple & tuple = field.get(); + Tuple & tuple = field.safeGet(); tuple.reserve(size); for (size_t i = 0; i < size; ++i) elems[i]->deserializeBinary(tuple.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 58178a896dc..f18466ad8ad 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -137,7 +137,7 @@ bool SerializationUUID::tryDeserializeTextCSV(IColumn & column, ReadBuffer & ist void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - UUID x = field.get(); + UUID x = field.safeGet(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index e4d71e84cc7..0f6a17ef167 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -218,7 +218,8 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state, - std::unordered_map & variants_statistics) const + std::unordered_map & variants_statistics, + size_t & total_size_of_variants) const { const ColumnVariant & col = assert_cast(column); if (const size_t size = col.size(); limit == 0 || offset + limit > size) @@ -265,6 +266,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian /// We can use the same offset/limit as for whole Variant column variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); variants_statistics[variant_names[non_empty_global_discr]] += limit; + total_size_of_variants += limit; settings.path.pop_back(); settings.path.pop_back(); return; @@ -315,7 +317,9 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian { addVariantElementToPath(settings.path, i); variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); - variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); + size_t variant_size = col.getVariantByGlobalDiscriminator(i).size(); + variants_statistics[variant_names[i]] += variant_size; + total_size_of_variants += variant_size; settings.path.pop_back(); } settings.path.pop_back(); @@ -386,6 +390,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian settings, variant_state->variant_states[i]); variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; + total_size_of_variants += variant_offsets_and_limits[i].second; settings.path.pop_back(); } } @@ -400,7 +405,8 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( DB::ISerialization::SerializeBinaryBulkStatePtr & state) const { std::unordered_map tmp_statistics; - serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(column, offset, limit, settings, state, tmp_statistics); + size_t tmp_size; + serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(column, offset, limit, settings, state, tmp_statistics, tmp_size); } void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( @@ -1068,6 +1074,16 @@ void SerializationVariant::serializeTextJSON(const IColumn & column, size_t row_ variants[global_discr]->serializeTextJSON(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings); } +void SerializationVariant::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const +{ + const ColumnVariant & col = assert_cast(column); + auto global_discr = col.globalDiscriminatorAt(row_num); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + SerializationNullable::serializeNullJSON(ostr); + else + variants[global_discr]->serializeTextJSONPretty(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings, indent); +} + bool SerializationVariant::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String field; diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index af89632cf81..a76a211e897 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -113,7 +113,8 @@ public: size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state, - std::unordered_map & variants_statistics) const; + std::unordered_map & variants_statistics, + size_t & total_size_of_variants) const; void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, @@ -145,6 +146,7 @@ public: bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 03b5d9584e0..9ad183a159e 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -193,16 +193,6 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); - /// If nothing was deserialized when variant_limit > 0 - /// it means that we don't have a stream for such sub-column. - /// It may happen during ALTER MODIFY column with Variant extension. - /// In this case we should just insert default values. - if (variant_element_state->variant->empty()) - { - mutable_column->insertManyDefaults(num_new_discriminators); - return; - } - /// If there was nothing to deserialize or nothing was actually deserialized when variant_limit > 0, just insert defaults. /// The second case means that we don't have a stream for such sub-column. It may happen during ALTER MODIFY column with Variant extension. if (variant_limit == 0 || variant_element_state->variant->empty()) @@ -305,8 +295,10 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, ColumnVariant::Discriminator local_variant_discriminator_, - bool make_nullable_) + bool make_nullable_, + const ColumnPtr & null_map_) : local_discriminators(local_discriminators_) + , null_map(null_map_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) @@ -314,12 +306,13 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( { } -DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const + +DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DataTypePtr & prev) const { return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } -SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const +SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const SerializationPtr & prev) const { return std::make_shared(prev, variant_element_name, global_variant_discriminator); } @@ -339,12 +332,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: return res; } - /// In general case we should iterate through discriminators and create null-map for our variant. - NullMap null_map; - null_map.reserve(local_discriminators->size()); - const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); - for (auto local_discr : local_discriminators_data) - null_map.push_back(local_discr != local_variant_discriminator); + /// In general case we should iterate through discriminators and create null-map for our variant if we don't already have it. + std::optional null_map_from_discriminators; + if (!null_map) + { + null_map_from_discriminators = NullMap(); + null_map_from_discriminators->reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_from_discriminators->push_back(local_discr != local_variant_discriminator); + } /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); @@ -356,13 +353,21 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); - res_column->expand(null_map, /*inverted = */ true); + if (null_map_from_discriminators) + res_column->expand(*null_map_from_discriminators, /*inverted = */ true); + else + res_column->expand(assert_cast(*null_map).getData(), /*inverted = */ true); if (make_nullable && prev->canBeInsideNullable()) { - auto null_map_col = ColumnUInt8::create(); - null_map_col->getData() = std::move(null_map); - return ColumnNullable::create(std::move(res_column), std::move(null_map_col)); + if (null_map_from_discriminators) + { + auto null_map_col = ColumnUInt8::create(); + null_map_col->getData() = std::move(*null_map_from_discriminators); + return ColumnNullable::create(std::move(res_column), std::move(null_map_col)); + } + + return ColumnNullable::create(std::move(res_column), null_map->assumeMutable()); } return res_column; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index 69101aea0f5..64f86eb2190 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -63,18 +63,22 @@ public: struct VariantSubcolumnCreator : public ISubcolumnCreator { + private: const ColumnPtr local_discriminators; + const ColumnPtr null_map; /// optional const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; bool make_nullable; + public: VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, ColumnVariant::Discriminator local_variant_discriminator_, - bool make_nullable_); + bool make_nullable_, + const ColumnPtr & null_map_ = nullptr); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; diff --git a/src/DataTypes/Serializations/tests/gtest_deprecated_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_deprecated_object_serialization.cpp new file mode 100644 index 00000000000..ec53df18297 --- /dev/null +++ b/src/DataTypes/Serializations/tests/gtest_deprecated_object_serialization.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if USE_SIMDJSON + +using namespace DB; + +TEST(SerializationObjectDeprecated, FromString) +{ + WriteBufferFromOwnString out; + + auto column_string = ColumnString::create(); + column_string->insert(R"({"k1" : 1, "k2" : [{"k3" : "aa", "k4" : 2}, {"k3": "bb", "k4": 3}]})"); + column_string->insert(R"({"k1" : 2, "k2" : [{"k3" : "cc", "k5" : 4}, {"k4": 5}, {"k4": 6}]})"); + + { + auto serialization = std::make_shared(); + + ISerialization::SerializeBinaryBulkSettings settings; + ISerialization::SerializeBinaryBulkStatePtr state; + settings.position_independent_encoding = false; + settings.getter = [&out](const auto &) { return &out; }; + + writeIntBinary(static_cast(1), out); + serialization->serializeBinaryBulkStatePrefix(*column_string, settings, state); + serialization->serializeBinaryBulkWithMultipleStreams(*column_string, 0, column_string->size(), settings, state); + serialization->serializeBinaryBulkStateSuffix(settings, state); + } + + auto type_object = std::make_shared("json", false); + ColumnPtr result_column = type_object->createColumn(); + + ReadBufferFromOwnString in(out.str()); + + { + auto serialization = type_object->getDefaultSerialization(); + + ISerialization::DeserializeBinaryBulkSettings settings; + ISerialization::DeserializeBinaryBulkStatePtr state; + settings.position_independent_encoding = false; + settings.getter = [&in](const auto &) { return ∈ }; + + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); + serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr); + } + + auto & column_object = assert_cast(*result_column->assumeMutable()); + column_object.finalize(); + + ASSERT_TRUE(column_object.size() == 2); + ASSERT_TRUE(column_object.getSubcolumns().size() == 4); + + auto check_subcolumn = [&](const auto & name, const auto & type_name, const std::vector & expected) + { + const auto & subcolumn = column_object.getSubcolumn(PathInData{name}); + ASSERT_EQ(subcolumn.getLeastCommonType()->getName(), type_name); + + const auto & data = subcolumn.getFinalizedColumn(); + for (size_t i = 0; i < expected.size(); ++i) + ASSERT_EQ( + applyVisitor(FieldVisitorToString(), data[i]), + applyVisitor(FieldVisitorToString(), expected[i])); + }; + + check_subcolumn("k1", "Int8", {1, 2}); + check_subcolumn("k2.k3", "Array(String)", {Array{"aa", "bb"}, Array{"cc", "", ""}}); + check_subcolumn("k2.k4", "Array(Int8)", {Array{2, 3}, Array{0, 5, 6}}); + check_subcolumn("k2.k5", "Array(Int8)", {Array{0, 0}, Array{4, 0, 0}}); +} + +#endif diff --git a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp index c6337a31fce..f104b75af9b 100644 --- a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp +++ b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp @@ -1,80 +1,98 @@ -#include -#include -#include -#include -#include -#include #include -#include -#include +#include +#include +#include #include -#if USE_SIMDJSON - using namespace DB; -TEST(SerializationObject, FromString) +TEST(ObjectSerialization, FieldBinarySerialization) { - WriteBufferFromOwnString out; - - auto column_string = ColumnString::create(); - column_string->insert(R"({"k1" : 1, "k2" : [{"k3" : "aa", "k4" : 2}, {"k3": "bb", "k4": 3}]})"); - column_string->insert(R"({"k1" : 2, "k2" : [{"k3" : "cc", "k5" : 4}, {"k4": 5}, {"k4": 6}]})"); - - { - auto serialization = std::make_shared(); - - ISerialization::SerializeBinaryBulkSettings settings; - ISerialization::SerializeBinaryBulkStatePtr state; - settings.position_independent_encoding = false; - settings.getter = [&out](const auto &) { return &out; }; - - writeIntBinary(static_cast(1), out); - serialization->serializeBinaryBulkStatePrefix(*column_string, settings, state); - serialization->serializeBinaryBulkWithMultipleStreams(*column_string, 0, column_string->size(), settings, state); - serialization->serializeBinaryBulkStateSuffix(settings, state); - } - - auto type_object = std::make_shared("json", false); - ColumnPtr result_column = type_object->createColumn(); - - ReadBufferFromOwnString in(out.str()); - - { - auto serialization = type_object->getDefaultSerialization(); - - ISerialization::DeserializeBinaryBulkSettings settings; - ISerialization::DeserializeBinaryBulkStatePtr state; - settings.position_independent_encoding = false; - settings.getter = [&in](const auto &) { return ∈ }; - - serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); - serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr); - } - - auto & column_object = assert_cast(*result_column->assumeMutable()); - column_object.finalize(); - - ASSERT_TRUE(column_object.size() == 2); - ASSERT_TRUE(column_object.getSubcolumns().size() == 4); - - auto check_subcolumn = [&](const auto & name, const auto & type_name, const std::vector & expected) - { - const auto & subcolumn = column_object.getSubcolumn(PathInData{name}); - ASSERT_EQ(subcolumn.getLeastCommonType()->getName(), type_name); - - const auto & data = subcolumn.getFinalizedColumn(); - for (size_t i = 0; i < expected.size(); ++i) - ASSERT_EQ( - applyVisitor(FieldVisitorToString(), data[i]), - applyVisitor(FieldVisitorToString(), expected[i])); - }; - - check_subcolumn("k1", "Int8", {1, 2}); - check_subcolumn("k2.k3", "Array(String)", {Array{"aa", "bb"}, Array{"cc", "", ""}}); - check_subcolumn("k2.k4", "Array(Int8)", {Array{2, 3}, Array{0, 5, 6}}); - check_subcolumn("k2.k5", "Array(Int8)", {Array{0, 0}, Array{4, 0, 0}}); + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, a.b UInt32, a.c Array(String))"); + auto serialization = type->getDefaultSerialization(); + Object object1 = Object{{"a.c", Array{"Str1", "Str2"}}, {"a.d", Field(42)}, {"a.e", Tuple{Field(43), "Str3"}}}; + WriteBufferFromOwnString ostr; + serialization->serializeBinary(object1, ostr, FormatSettings()); + ReadBufferFromString istr(ostr.str()); + Field object2; + serialization->deserializeBinary(object2, istr, FormatSettings()); + ASSERT_EQ(object1, object2.safeGet()); } -#endif + +TEST(ObjectSerialization, ColumnBinarySerialization) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, a.b UInt32, a.c Array(String))"); + auto serialization = type->getDefaultSerialization(); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"a.c", Array{"Str1", "Str2"}}, {"a.d", Field(42)}, {"a.e", Tuple{Field(43), "Str3"}}}); + WriteBufferFromOwnString ostr1; + serialization->serializeBinary(col_object, 0, ostr1, FormatSettings()); + ReadBufferFromString istr1(ostr1.str()); + serialization->deserializeBinary(col_object, istr1, FormatSettings()); + ASSERT_EQ(col_object[0], col_object[1]); + col_object.insert(Object{{"a.c", Array{"Str1", "Str2"}}, {"a.e", Field(42)}, {"b.d", Field(42)}, {"b.e", Tuple{Field(43), "Str3"}}, {"b.g", Field("Str4")}}); + WriteBufferFromOwnString ostr2; + serialization->serializeBinary(col_object, 2, ostr2, FormatSettings()); + ReadBufferFromString istr2(ostr2.str()); + serialization->deserializeBinary(col_object, istr2, FormatSettings()); + ASSERT_EQ(col_object[2], col_object[3]); +} + +TEST(ObjectSerialization, JSONSerialization) +{ + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=2, a.b UInt32, a.c Array(String))"); + auto serialization = type->getDefaultSerialization(); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + col_object.insert(Object{{"a.c", Array{"Str1", "Str2"}}, {"a.d", Field(42)}, {"a.e", Tuple{Field(43), "Str3"}}}); + col_object.insert(Object{{"a.c", Array{"Str1", "Str2"}}, {"a", Tuple{Field(43), "Str3"}}, {"a.b.c", Field(42)}, {"a.b.e", Field(43)}, {"b.c.d.e", Field(42)}, {"b.c.d.g", Field(43)}, {"b.c.h.r", Field(44)}, {"c.g.h.t", Array{Field("Str"), Field("Str2")}}, {"h", Field("Str")}, {"j", Field("Str")}}); + WriteBufferFromOwnString buf1; + serialization->serializeTextJSON(col_object, 1, buf1, FormatSettings()); + ASSERT_EQ(buf1.str(), R"({"a":[43,"Str3"],"a":{"b":0,"b":{"c":42,"e":43},"c":["Str1","Str2"]},"b":{"c":{"d":{"e":42,"g":43},"h":{"r":44}}},"c":{"g":{"h":{"t":["Str","Str2"]}}},"h":"Str","j":"Str"})"); + WriteBufferFromOwnString buf2; + serialization->serializeTextJSONPretty(col_object, 1, buf2, FormatSettings(), 0); + ASSERT_EQ(buf2.str(), R"({ + "a" : [ + 43, + "Str3" + ], + "a" : { + "b" : 0, + "b" : { + "c" : 42, + "e" : 43 + }, + "c" : [ + "Str1", + "Str2" + ] + }, + "b" : { + "c" : { + "d" : { + "e" : 42, + "g" : 43 + }, + "h" : { + "r" : 44 + } + } + }, + "c" : { + "g" : { + "h" : { + "t" : [ + "Str", + "Str2" + ] + } + } + }, + "h" : "Str", + "j" : "Str" +})"); + +} diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp index e7e69e379af..a6e9452d7ef 100644 --- a/src/DataTypes/Utils.cpp +++ b/src/DataTypes/Utils.cpp @@ -216,6 +216,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ return false; } case TypeIndex::String: + case TypeIndex::ObjectDeprecated: case TypeIndex::Object: case TypeIndex::Set: case TypeIndex::Interval: diff --git a/src/DataTypes/fuzzers/CMakeLists.txt b/src/DataTypes/fuzzers/CMakeLists.txt index bc640358673..e54ef0a860c 100644 --- a/src/DataTypes/fuzzers/CMakeLists.txt +++ b/src/DataTypes/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS}) -target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions) +target_link_libraries(data_type_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a71b19d6c92..65df529e78b 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -228,6 +228,39 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type } } +DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set) +{ + auto min_interval = IntervalKind::Kind::Year; + DataTypePtr smallest_type; + + bool is_higher_interval = false; // For Years, Quarters and Months + + for (const auto &type : types) + { + if (const auto * interval_type = typeid_cast(type.get())) + { + auto current_interval = interval_type->getKind().kind; + if (current_interval > IntervalKind::Kind::Week) + is_higher_interval = true; + if (current_interval < min_interval) + { + min_interval = current_interval; + smallest_type = type; + } + } + } + + if (is_higher_interval && min_interval <= IntervalKind::Kind::Week) + throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the number of days in a month is not fixed", types[0]->getName(), types[1]->getName()); + + if (smallest_type) + { + types_set.clear(); + types_set.insert(smallest_type->getTypeId()); + } + + return smallest_type; +} } template @@ -652,6 +685,13 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return numeric_type; } + /// For interval data types. + { + auto res = findSmallestIntervalSuperType(types, type_ids); + if (res) + return res; + } + /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 2ae1e52ca96..55d8e8fff0d 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -1,5 +1,7 @@ #pragma once #include +#include +#include namespace DB { @@ -48,4 +50,7 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); +/// A vector that shows the conversion rates to the next Interval type starting from NanoSecond +static std::vector interval_conversions = {1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4}; + } diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 84a52d4affb..9a632bd381b 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -55,7 +55,7 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume } } - return argument->value.get(); + return argument->value.safeGet(); } static DataTypePtr create(const ASTPtr & arguments) diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp index 4d0bfc67183..789aeac566f 100644 --- a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -126,4 +126,7 @@ GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) check(DataTypeFactory::instance().get("Polygon")); check(DataTypeFactory::instance().get("MultiPolygon")); check(DataTypeFactory::instance().get("Tuple(Map(LowCardinality(String), Array(AggregateFunction(2, quantiles(0.1, 0.2), Float32))), Array(Array(Tuple(UInt32, Tuple(a Map(String, String), b Nullable(Date), c Variant(Tuple(g String, d Array(UInt32)), Date, Map(String, String)))))))")); + check(DataTypeFactory::instance().get("JSON")); + check(DataTypeFactory::instance().get("JSON(max_dynamic_paths=10)")); + check(DataTypeFactory::instance().get("JSON(max_dynamic_paths=10, max_dynamic_types=10, a.b.c UInt32, SKIP a.c, b.g String, SKIP l.d.f)")); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 67bce915168..b91aa84ecd3 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -183,7 +183,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (name->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.safeGet()); if (!maybe_qualified_name) return; @@ -194,7 +194,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name) return; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 38e100e2470..7556223b30e 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -180,7 +180,7 @@ namespace if (database_name_field && table_name_field) { - QualifiedTableName qualified_name{database_name_field->get(), table_name_field->get()}; + QualifiedTableName qualified_name{database_name_field->safeGet(), table_name_field->safeGet()}; if (!qualified_name.database.empty() && !qualified_name.table.empty()) { auto new_qualified_name = data.renaming_map.getNewTableName(qualified_name); @@ -207,7 +207,7 @@ namespace if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name || maybe_qualified_name->database.empty() || maybe_qualified_name->table.empty()) return; @@ -247,7 +247,7 @@ namespace if (!literal || (literal->value.getType() != Field::Types::String)) return; - auto database_name = literal->value.get(); + auto database_name = literal->value.safeGet(); if (database_name.empty()) return; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 3fb6d30fcb8..2ccdd8510a8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -52,7 +52,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/) { - iterateMetadataFiles(local_context, [this, &local_context](const String & file_name) + iterateMetadataFiles([this, &local_context](const String & file_name) { const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 41cfb751141..aeac130594f 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -12,7 +12,7 @@ class DatabaseLazyIterator; class Context; /** Lazy engine of databases. - * Works like DatabaseOrdinary, but stores in memory only the cache. + * Works like DatabaseOrdinary, but stores only recently accessed tables in memory. * Can be used only with *Log engines. */ class DatabaseLazy final : public DatabaseOnDisk diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 734f354d9a5..81378fc1c64 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -504,7 +504,7 @@ void DatabaseOnDisk::renameTable( } -/// It returns create table statement (even if table is detached) +/// It returns the create table statement (even if table is detached) ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, ContextPtr, bool throw_on_error) const { ASTPtr ast; @@ -568,14 +568,14 @@ void DatabaseOnDisk::drop(ContextPtr local_context) assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty()); if (local_context->getSettingsRef().force_remove_data_recursively_on_drop) { - (void)fs::remove_all(local_context->getPath() + getDataPath()); + (void)fs::remove_all(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove_all(getMetadataPath()); } else { try { - (void)fs::remove(local_context->getPath() + getDataPath()); + (void)fs::remove(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove(getMetadataPath()); } catch (const fs::filesystem_error & e) @@ -613,7 +613,7 @@ time_t DatabaseOnDisk::getObjectMetadataModificationTime(const String & object_n } } -void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const IteratingFunction & process_metadata_file) const +void DatabaseOnDisk::iterateMetadataFiles(const IteratingFunction & process_metadata_file) const { auto process_tmp_drop_metadata_file = [&](const String & file_name) { @@ -621,7 +621,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat static const char * tmp_drop_ext = ".sql.tmp_drop"; const std::string object_name = file_name.substr(0, file_name.size() - strlen(tmp_drop_ext)); - if (fs::exists(local_context->getPath() + getDataPath() + '/' + object_name)) + if (fs::exists(std::filesystem::path(getContext()->getPath()) / data_path / object_name)) { fs::rename(getMetadataPath() + file_name, getMetadataPath() + object_name + ".sql"); LOG_WARNING(log, "Object {} was not dropped previously and will be restored", backQuote(object_name)); @@ -638,7 +638,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat std::vector> metadata_files; fs::directory_iterator dir_end; - for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it(metadata_path); dir_it != dir_end; ++dir_it) { String file_name = dir_it->path().filename(); /// For '.svn', '.gitignore' directory and similar. diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index 12656068643..ffc95a7c128 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -64,7 +64,7 @@ public: time_t getObjectMetadataModificationTime(const String & object_name) const override; String getDataPath() const override { return data_path; } - String getTableDataPath(const String & table_name) const override { return data_path + escapeForFileName(table_name) + "/"; } + String getTableDataPath(const String & table_name) const override { return std::filesystem::path(data_path) / escapeForFileName(table_name) / ""; } String getTableDataPath(const ASTCreateQuery & query) const override { return getTableDataPath(query.getTable()); } String getMetadataPath() const override { return metadata_path; } @@ -83,7 +83,7 @@ protected: using IteratingFunction = std::function; - void iterateMetadataFiles(ContextPtr context, const IteratingFunction & process_metadata_file) const; + void iterateMetadataFiles(const IteratingFunction & process_metadata_file) const; ASTPtr getCreateTableQueryImpl( const String & table_name, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8808261654f..dd8a3f42ea8 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -55,7 +55,7 @@ static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; static constexpr const char * const CONVERT_TO_REPLICATED_FLAG_NAME = "convert_to_replicated"; DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) - : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) + : DatabaseOrdinary(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseOrdinary (" + name_ + ")", context_) { } @@ -265,7 +265,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables } }; - iterateMetadataFiles(local_context, process_metadata); + iterateMetadataFiles(process_metadata); size_t objects_in_database = metadata.parsed_tables.size() - prev_tables_count; size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f127ccbc224..8e3378bcc12 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -338,9 +340,12 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const return std::make_shared(getContext()->getSettingsRef(), shards, params); } -std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const +ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const { Strings paths; + + paths.emplace_back(fs::path(zookeeper_path) / "max_log_ptr"); + const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) @@ -349,22 +354,50 @@ std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr { String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); + paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); } } try { auto current_zookeeper = getZooKeeper(); - auto res = current_zookeeper->exists(paths); + auto zk_res = current_zookeeper->tryGet(paths); - std::vector statuses; - statuses.resize(paths.size()); + auto max_log_ptr_zk = zk_res[0]; + if (max_log_ptr_zk.error != Coordination::Error::ZOK) + throw Coordination::Exception(max_log_ptr_zk.error); - for (size_t i = 0; i < res.size(); ++i) - if (res[i].error == Coordination::Error::ZOK) - statuses[i] = 1; + UInt32 max_log_ptr = parse(max_log_ptr_zk.data); - return statuses; + ReplicasInfo replicas_info; + replicas_info.resize((zk_res.size() - 1) / 2); + + size_t global_replica_index = 0; + for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) + { + for (const auto & replica : addresses_with_failover[shard_index]) + { + auto replica_active = zk_res[2 * global_replica_index + 1]; + auto replica_log_ptr = zk_res[2 * global_replica_index + 2]; + + UInt64 recovery_time = 0; + { + std::lock_guard lock(ddl_worker_mutex); + if (replica.is_local && ddl_worker) + recovery_time = ddl_worker->getCurrentInitializationDurationMs(); + } + + replicas_info[global_replica_index] = ReplicaInfo{ + .is_active = replica_active.error == Coordination::Error::ZOK, + .replication_lag = replica_log_ptr.error != Coordination::Error::ZNONODE ? std::optional(max_log_ptr - parse(replica_log_ptr.data)) : std::nullopt, + .recovery_time = recovery_time, + }; + + ++global_replica_index; + } + } + + return replicas_info; } catch (...) { @@ -373,7 +406,6 @@ std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr } } - void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) { const auto & config_prefix = fmt::format("named_collections.{}", collection_name); @@ -802,8 +834,8 @@ void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStora if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) return; - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); + String maybe_path = arg1->value.safeGet(); + String maybe_replica = arg2->value.safeGet(); /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. /// Let's ensure that some macros are used. @@ -1111,39 +1143,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// We will execute some CREATE queries for recovery (not ATTACH queries), /// so we need to allow experimental features that can be used in a CREATE query - query_context->setSetting("allow_experimental_inverted_index", 1); - query_context->setSetting("allow_experimental_full_text_index", 1); - query_context->setSetting("allow_experimental_codecs", 1); - query_context->setSetting("allow_experimental_live_view", 1); - query_context->setSetting("allow_experimental_window_view", 1); - query_context->setSetting("allow_experimental_funnel_functions", 1); - query_context->setSetting("allow_experimental_nlp_functions", 1); - query_context->setSetting("allow_experimental_hash_functions", 1); - query_context->setSetting("allow_experimental_object_type", 1); - query_context->setSetting("allow_experimental_variant_type", 1); - query_context->setSetting("allow_experimental_dynamic_type", 1); - query_context->setSetting("allow_experimental_annoy_index", 1); - query_context->setSetting("allow_experimental_usearch_index", 1); - query_context->setSetting("allow_experimental_bigint_types", 1); - query_context->setSetting("allow_experimental_window_functions", 1); - query_context->setSetting("allow_experimental_geo_types", 1); - query_context->setSetting("allow_experimental_map_type", 1); - query_context->setSetting("allow_deprecated_error_prone_window_functions", 1); - - query_context->setSetting("allow_suspicious_low_cardinality_types", 1); - query_context->setSetting("allow_suspicious_fixed_string_types", 1); - query_context->setSetting("allow_suspicious_indices", 1); - query_context->setSetting("allow_suspicious_codecs", 1); - query_context->setSetting("allow_hyperscan", 1); - query_context->setSetting("allow_simdjson", 1); - query_context->setSetting("allow_deprecated_syntax_for_merge_tree", 1); - query_context->setSetting("allow_suspicious_primary_key", 1); - query_context->setSetting("allow_suspicious_ttl_expressions", 1); - query_context->setSetting("allow_suspicious_variant_types", 1); - query_context->setSetting("enable_deflate_qpl_codec", 1); - query_context->setSetting("enable_zstd_qat_codec", 1); - query_context->setSetting("allow_create_index_without_type", 1); - query_context->setSetting("allow_experimental_s3queue", 1); + enableAllExperimentalSettings(query_context); auto txn = std::make_shared(current_zookeeper, zookeeper_path, false, ""); query_context->initZooKeeperMetadataTransaction(txn); @@ -1553,6 +1553,8 @@ void DatabaseReplicated::dropTable(ContextPtr local_context, const String & tabl } auto table = tryGetTable(table_name, getContext()); + if (!table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} doesn't exist", table_name); if (table->getName() == "MaterializedView" || table->getName() == "WindowView") { /// Avoid recursive locking of metadata_mutex diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 27ab262d1f1..db683be8f36 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -17,6 +19,14 @@ using ZooKeeperPtr = std::shared_ptr; class Cluster; using ClusterPtr = std::shared_ptr; +struct ReplicaInfo +{ + bool is_active; + std::optional replication_lag; + UInt64 recovery_time; +}; +using ReplicasInfo = std::vector; + class DatabaseReplicated : public DatabaseAtomic { public: @@ -84,7 +94,7 @@ public: static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop); - std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const; void renameDatabase(ContextPtr query_context, const String & new_name) override; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 1ef88dc03bc..4e7408aa96e 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,6 +32,12 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.emplace(); + initialization_duration_timer->start(); + } + while (!stop_flag) { try @@ -69,6 +75,10 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } return true; } catch (...) @@ -78,6 +88,11 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } + return false; } @@ -459,4 +474,10 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const return max_id.load(); } +UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const +{ + std::lock_guard lock(initialization_duration_timer_mutex); + return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; +} + } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 41edf2221b8..2309c831839 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -36,6 +36,8 @@ public: DatabaseReplicated * const database, bool committed = false); /// NOLINT UInt32 getLogPointer() const; + + UInt64 getCurrentInitializationDurationMs() const; private: bool initializeMainThread() override; void initializeReplication(); @@ -56,6 +58,9 @@ private: ZooKeeperPtr active_node_holder_zookeeper; /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; + + std::optional initialization_duration_timer; + mutable std::mutex initialization_duration_timer_mutex; }; } diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 801356b3dd7..495733e15fd 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -14,6 +14,8 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_TABLE; } DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_) @@ -124,6 +126,39 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab getEngineName()); } +void DatabasesOverlay::renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) +{ + for (auto & db : databases) + { + if (db->isTableExist(name, current_context)) + { + if (DatabasesOverlay * to_overlay_database = typeid_cast(&to_database)) + { + /// Renaming from Overlay database inside itself or into another Overlay database. + /// Just use the first database in the overlay as a destination. + if (to_overlay_database->databases.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The destination Overlay database {} does not have any members", to_database.getDatabaseName()); + + db->renameTable(current_context, name, *to_overlay_database->databases[0], to_name, exchange, dictionary); + } + else + { + /// Renaming into a different type of database. E.g. from Overlay on top of Atomic database into just Atomic database. + db->renameTable(current_context, name, to_database, to_name, exchange, dictionary); + } + + return; + } + } + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(getDatabaseName()), backQuote(name)); +} + ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const { ASTPtr result = nullptr; @@ -178,6 +213,18 @@ String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const return result; } +UUID DatabasesOverlay::getUUID() const +{ + UUID result = UUIDHelpers::Nil; + for (const auto & db : databases) + { + result = db->getUUID(); + if (result != UUIDHelpers::Nil) + break; + } + return result; +} + UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const { UUID result = UUIDHelpers::Nil; diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index b0c7e7e4032..40c653e5cb5 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -35,12 +35,21 @@ public: StoragePtr detachTable(ContextPtr context, const String & table_name) override; + void renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) override; + ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override; ASTPtr getCreateDatabaseQuery() const override; String getTableDataPath(const String & table_name) const override; String getTableDataPath(const ASTCreateQuery & query) const override; + UUID getUUID() const override; UUID tryGetTableUUID(const String & table_name) const override; void drop(ContextPtr context) override; diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 04b4070d5af..1364e9ae2b2 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -736,11 +736,11 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) - casted_column->insertValue(static_cast(value.template get())); + casted_column->insertValue(static_cast(value.template safeGet())); } }; @@ -776,17 +776,17 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { if (value.getType() == Field::Types::UInt64) - casted_int32_column->insertValue(static_cast(value.get())); + casted_int32_column->insertValue(static_cast(value.safeGet())); else if (value.getType() == Field::Types::Int64) { /// For MYSQL_TYPE_INT24 - const Int32 & num = static_cast(value.get()); + const Int32 & num = static_cast(value.safeGet()); casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num); } else @@ -798,7 +798,7 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) @@ -812,12 +812,12 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_fixed_string_column->insertData(data.data(), data.size()); } } @@ -864,7 +864,7 @@ static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t { writeable_rows_mask[index + 1] = true; writeable_rows_mask[index] = differenceSortingKeys( - rows_data[index].get(), rows_data[index + 1].get(), sorting_columns_index); + rows_data[index].safeGet(), rows_data[index + 1].safeGet(), sorting_columns_index); } for (size_t column = 0; column < buffer.columns() - 2; ++column) diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp index 11299c5b8b1..6f1ba26ee33 100644 --- a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -281,12 +281,12 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(write_event->table, "a"); ASSERT_EQ(write_event->rows.size(), 1); ASSERT_EQ(write_event->rows[0].getType(), Field::Types::Tuple); - auto row_data = write_event->rows[0].get(); + auto row_data = write_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; @@ -342,18 +342,18 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(update_event->table, "a"); ASSERT_EQ(update_event->rows.size(), 2); ASSERT_EQ(update_event->rows[0].getType(), Field::Types::Tuple); - row_data = update_event->rows[0].get(); + row_data = update_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); - row_data = update_event->rows[1].get(); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); + row_data = update_event->rows[1].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 2u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 2u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 943f3ae502e..b9fd9c325f8 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -196,7 +196,7 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( } else { - std::tuple row; + std::tuple row; while (stream >> row) { const auto column_name = std::get<0>(row); @@ -206,13 +206,14 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( std::get<3>(row)); columns.push_back(NameAndTypePair(column_name, data_type)); - auto attgenerated = std::get<6>(row); + auto attgenerated = std::get<7>(row); attributes.emplace( column_name, PostgreSQLTableStructure::PGAttribute{ .atttypid = parse(std::get<4>(row)), .atttypmod = parse(std::get<5>(row)), + .attnum = parse(std::get<6>(row)), .atthasdef = false, .attgenerated = attgenerated.empty() ? char{} : char(attgenerated[0]), .attr_def = {} @@ -308,6 +309,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "attndims AS dims, " /// array dimensions "atttypid as type_id, " "atttypmod as type_modifier, " + "attnum as att_num, " "attgenerated as generated " /// if column has GENERATED "FROM pg_attribute " "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) " @@ -338,17 +340,29 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "WHERE adrelid = (SELECT oid FROM pg_class WHERE {});", where); pqxx::result result{tx.exec(attrdef_query)}; - for (const auto row : result) + if (static_cast(result.size()) > table.physical_columns->names.size()) { - size_t adnum = row[0].as(); - if (!adnum || adnum > table.physical_columns->names.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Received {} attrdef, but currently fetched columns list has {} columns", + result.size(), table.physical_columns->attributes.size()); + } + + for (const auto & column_attrs : table.physical_columns->attributes) + { + if (column_attrs.second.attgenerated != 's') /// e.g. not a generated column { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Received adnum {}, but currently fetched columns list has {} columns", - adnum, table.physical_columns->attributes.size()); + continue; + } + + for (const auto row : result) + { + int adnum = row[0].as(); + if (column_attrs.second.attnum == adnum) + { + table.physical_columns->attributes.at(column_attrs.first).attr_def = row[1].as(); + break; + } } - const auto column_name = table.physical_columns->names[adnum - 1]; - table.physical_columns->attributes.at(column_name).attr_def = row[1].as(); } } diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h index 81bf7b278fc..25ece6909fd 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h @@ -16,6 +16,7 @@ struct PostgreSQLTableStructure { Int32 atttypid; Int32 atttypmod; + Int32 attnum; bool atthasdef; char attgenerated; std::string attr_def; diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index 26b5777f1a9..bf469d83245 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include diff --git a/src/Databases/enableAllExperimentalSettings.cpp b/src/Databases/enableAllExperimentalSettings.cpp new file mode 100644 index 00000000000..9abe05d7bce --- /dev/null +++ b/src/Databases/enableAllExperimentalSettings.cpp @@ -0,0 +1,49 @@ +#include + +namespace DB +{ + +/* + * Enables all settings that allow the use of experimental, deprecated, or potentially unsafe features + * in a CREATE query. This function is used in DatabaseReplicated::recoverLostReplica() to create tables + * when the original settings used to create the table are not available. + */ + +void enableAllExperimentalSettings(ContextMutablePtr context) +{ + context->setSetting("allow_experimental_inverted_index", 1); + context->setSetting("allow_experimental_full_text_index", 1); + context->setSetting("allow_experimental_codecs", 1); + context->setSetting("allow_experimental_live_view", 1); + context->setSetting("allow_experimental_window_view", 1); + context->setSetting("allow_experimental_funnel_functions", 1); + context->setSetting("allow_experimental_nlp_functions", 1); + context->setSetting("allow_experimental_hash_functions", 1); + context->setSetting("allow_experimental_object_type", 1); + context->setSetting("allow_experimental_variant_type", 1); + context->setSetting("allow_experimental_dynamic_type", 1); + context->setSetting("allow_experimental_json_type", 1); + context->setSetting("allow_experimental_vector_similarity_index", 1); + context->setSetting("allow_experimental_bigint_types", 1); + context->setSetting("allow_experimental_window_functions", 1); + context->setSetting("allow_experimental_geo_types", 1); + context->setSetting("allow_experimental_map_type", 1); + context->setSetting("allow_deprecated_error_prone_window_functions", 1); + + context->setSetting("allow_suspicious_low_cardinality_types", 1); + context->setSetting("allow_suspicious_fixed_string_types", 1); + context->setSetting("allow_suspicious_indices", 1); + context->setSetting("allow_suspicious_codecs", 1); + context->setSetting("allow_hyperscan", 1); + context->setSetting("allow_simdjson", 1); + context->setSetting("allow_deprecated_syntax_for_merge_tree", 1); + context->setSetting("allow_suspicious_primary_key", 1); + context->setSetting("allow_suspicious_ttl_expressions", 1); + context->setSetting("allow_suspicious_variant_types", 1); + context->setSetting("enable_deflate_qpl_codec", 1); + context->setSetting("enable_zstd_qat_codec", 1); + context->setSetting("allow_create_index_without_type", 1); + context->setSetting("allow_experimental_s3queue", 1); +} + +} diff --git a/src/Databases/enableAllExperimentalSettings.h b/src/Databases/enableAllExperimentalSettings.h new file mode 100644 index 00000000000..ec3bfb98843 --- /dev/null +++ b/src/Databases/enableAllExperimentalSettings.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace DB +{ + +/* + * Enables all settings that allow the use of experimental, deprecated, or potentially unsafe features + * in a CREATE query. This function is used in DatabaseReplicated::recoverLostReplica() to create tables + * when the original settings used to create the table are not available. + */ + +void enableAllExperimentalSettings(ContextMutablePtr context); + +} diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 47f99bd1093..781822533e9 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -395,13 +395,13 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); container.back() = inserted_value; } else { - container.back() = static_cast(column_value.get()); + container.back() = static_cast(column_value.safeGet()); } }); } @@ -441,7 +441,7 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); if (!cell_was_default) @@ -454,7 +454,7 @@ private: } else { - container[index_to_use] = static_cast(column_value.get()); + container[index_to_use] = static_cast(column_value.safeGet()); } }); } @@ -651,12 +651,12 @@ private: } else if constexpr (std::is_same_v) { - auto & value = default_value.get(); + auto & value = default_value.safeGet(); value_setter(value); } else { - value_setter(default_value.get()); + value_setter(default_value.safeGet()); } } else diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index bf16f315ddf..b36d53a6159 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -51,6 +51,8 @@ namespace configuration.db, configuration.user, configuration.password, + configuration.proto_send_chunked, + configuration.proto_recv_chunked, configuration.quota_key, "", /* cluster */ "", /* cluster_secret */ @@ -222,7 +224,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) { validateNamedCollection( *named_collection, {}, ValidateKeysMultiset{ - "secure", "host", "hostname", "port", "user", "username", "password", "quota_key", "name", + "secure", "host", "hostname", "port", "user", "username", "password", "proto_send_chunked", "proto_recv_chunked", "quota_key", "name", "db", "database", "table","query", "where", "invalidate_query", "update_field", "update_lag"}); const auto secure = named_collection->getOrDefault("secure", false); @@ -234,6 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = named_collection->getAnyOrDefault({"user", "username"}, "default"), .password = named_collection->getOrDefault("password", ""), + .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "notchunked"), + .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "notchunked"), .quota_key = named_collection->getOrDefault("quota_key", ""), .db = named_collection->getAnyOrDefault({"db", "database"}, default_database), .table = named_collection->getOrDefault("table", ""), @@ -258,6 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = config.getString(settings_config_prefix + ".user", "default"), .password = config.getString(settings_config_prefix + ".password", ""), + .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked"), + .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked"), .quota_key = config.getString(settings_config_prefix + ".quota_key", ""), .db = config.getString(settings_config_prefix + ".db", default_database), .table = config.getString(settings_config_prefix + ".table", ""), diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 3357514eab2..faf9e5f8009 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -23,6 +23,8 @@ public: const std::string host; const std::string user; const std::string password; + const std::string proto_send_chunked; + const std::string proto_recv_chunked; const std::string quota_key; const std::string db; const std::string table; diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 64fc05e99ab..43fd39640c3 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -345,7 +345,7 @@ public: if (attribute_default_value.isNull()) default_value_is_null = true; else - default_value = static_cast(attribute_default_value.get()); + default_value = static_cast(attribute_default_value.safeGet()); } else { @@ -377,7 +377,7 @@ public: if constexpr (std::is_same_v) { Field field = (*default_values_column)[row]; - return field.get(); + return field.safeGet(); } else if constexpr (std::is_same_v) return default_values_column->getDataAt(row); diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 999160226d9..b0233766741 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -245,7 +245,7 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr & std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -300,7 +300,7 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -701,7 +701,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, return; } - auto & attribute_value = value.get(); + auto & attribute_value = value.safeGet(); auto & container = std::get>(attribute.container); loaded_keys[key] = true; diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 663c63dd6c6..bf19f912723 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -8,12 +8,12 @@ #include #include #include -#include #include #include #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" +#include #include "registerDictionaries.h" @@ -223,21 +223,23 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) String endpoint; String format; - auto named_collection = created_from_ddl - ? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context) - : std::nullopt; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { - url = named_collection->configuration.url; - endpoint = named_collection->configuration.endpoint; - format = named_collection->configuration.format; + validateNamedCollection( + *named_collection, + /* required_keys */{}, + /* optional_keys */ValidateKeysMultiset{ + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); - credentials.setUsername(named_collection->configuration.user); - credentials.setPassword(named_collection->configuration.password); + url = named_collection->getOrDefault("url", ""); + endpoint = named_collection->getOrDefault("endpoint", ""); + format = named_collection->getOrDefault("format", ""); - header_entries.reserve(named_collection->configuration.headers.size()); - for (const auto & [key, value] : named_collection->configuration.headers) - header_entries.emplace_back(key, value); + credentials.setUsername(named_collection->getAnyOrDefault({"user", "credentials.user"}, "")); + credentials.setPassword(named_collection->getAnyOrDefault({"password", "credentials.password"}, "")); + + header_entries = getHeadersFromNamedCollection(*named_collection); } else { diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d7d50dfb0a6..8768be8e5ec 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -240,7 +240,7 @@ ColumnPtr HashedArrayDictionary::getHierarchy(Colu std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&, this](auto & key) @@ -313,7 +313,7 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierar std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&](auto & key) @@ -581,13 +581,13 @@ void HashedArrayDictionary::blockToAttributes(cons if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert); attribute_container.back() = string_in_arena_reference; } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); attribute_container.back() = value_to_insert; } }; diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 3a2b61e5149..7e935fe4855 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -636,7 +636,7 @@ ColumnPtr HashedDictionary::getHierarchy(C std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -710,7 +710,7 @@ ColumnUInt8::Ptr HashedDictionary::isInHie std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -1004,13 +1004,13 @@ void HashedDictionary::blockToAttributes(c if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef arena_value = copyStringInArena(*string_arenas[shard], value_to_insert); container.insert({key, arena_value}); } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); container.insert({key, value_to_insert}); } diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index e1119982a34..de532ade26d 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -50,7 +50,7 @@ namespace std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); ColumnPtr key_to_request_column = ColumnVector::create(); auto * key_to_request_column_typed = static_cast *>(key_to_request_column->assumeMutable().get()); @@ -190,7 +190,7 @@ ColumnPtr getKeysHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { @@ -252,7 +252,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 41fafcc162b..4f9e991752f 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -613,14 +613,14 @@ void IPAddressDictionary::calculateBytesAllocated() template void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? T{} : T(null_value.get()); + attribute.null_values = null_value.isNull() ? T{} : T(null_value.safeGet()); attribute.maps.emplace>(); } template <> void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? String() : null_value.get(); + attribute.null_values = null_value.isNull() ? String() : null_value.safeGet(); attribute.maps.emplace>(); attribute.string_arena = std::make_unique(); } @@ -976,13 +976,13 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); setAttributeValueImpl(attribute, StringRef{string_in_arena, string.size()}); } else { - setAttributeValueImpl(attribute, static_cast(value.get())); + setAttributeValueImpl(attribute, static_cast(value.safeGet())); } }; diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 46910fa9f6a..7bacfdab3d2 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,16 +1,12 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" -#include #include +#include namespace DB { -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"}; - void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { auto create_mongo_db_dictionary = []( @@ -23,35 +19,53 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) bool created_from_ddl) { const auto config_prefix = root_config_prefix + ".mongodb"; - ExternalDataSourceConfiguration configuration; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); }; - auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key); + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr; + + String host, username, password, database, method, options, collection; + UInt16 port; if (named_collection) { - configuration = named_collection->configuration; + validateNamedCollection( + *named_collection, + /* required_keys */{"collection"}, + /* optional_keys */ValidateKeysMultiset{ + "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"}); + + host = named_collection->getOrDefault("host", ""); + port = static_cast(named_collection->getOrDefault("port", 0)); + username = named_collection->getOrDefault("user", ""); + password = named_collection->getOrDefault("password", ""); + database = named_collection->getAnyOrDefault({"db", "database"}, ""); + method = named_collection->getOrDefault("method", ""); + collection = named_collection->getOrDefault("collection", ""); + options = named_collection->getOrDefault("options", ""); } else { - configuration.host = config.getString(config_prefix + ".host", ""); - configuration.port = config.getUInt(config_prefix + ".port", 0); - configuration.username = config.getString(config_prefix + ".user", ""); - configuration.password = config.getString(config_prefix + ".password", ""); - configuration.database = config.getString(config_prefix + ".db", ""); + host = config.getString(config_prefix + ".host", ""); + port = config.getUInt(config_prefix + ".port", 0); + username = config.getString(config_prefix + ".user", ""); + password = config.getString(config_prefix + ".password", ""); + database = config.getString(config_prefix + ".db", ""); + method = config.getString(config_prefix + ".method", ""); + collection = config.getString(config_prefix + ".collection"); + options = config.getString(config_prefix + ".options", ""); } if (created_from_ddl) - context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); + context->getRemoteHostFilter().checkHostAndPort(host, toString(port)); - return std::make_unique(dict_struct, + return std::make_unique( + dict_struct, config.getString(config_prefix + ".uri", ""), - configuration.host, - configuration.port, - configuration.username, - configuration.password, - config.getString(config_prefix + ".method", ""), - configuration.database, - config.getString(config_prefix + ".collection"), - config.getString(config_prefix + ".options", ""), + host, + port, + username, + password, + method, + database, + collection, + options, sample_block); }; @@ -233,7 +247,7 @@ QueryPipeline MongoDBDictionarySource::loadKeys(const Columns & key_columns, con } case AttributeUnderlyingType::String: { - String loaded_str((*key_columns[attribute_index])[row_idx].get()); + String loaded_str((*key_columns[attribute_index])[row_idx].safeGet()); /// Convert string to ObjectID if (key_attribute.is_object_id) { diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index dfc920623e3..ff29ca1f6b8 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -141,7 +141,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsShortCircuitImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_mask.value()); } @@ -149,7 +149,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_value_provider.value()); } @@ -432,16 +432,16 @@ void IPolygonDictionary::getItemsImpl( } else if constexpr (std::is_same_v) { - set_value(default_value.get()); + set_value(default_value.safeGet()); } else if constexpr (std::is_same_v) { - auto default_value_string = default_value.get(); + auto default_value_string = default_value.safeGet(); set_value(default_value_string); } else { - set_value(default_value.get>()); + set_value(default_value.safeGet>()); } } } diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index f62a9a009d8..b1bab17e2e9 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -4,6 +4,7 @@ #include #include #include "DictionarySourceFactory.h" +#include #include "registerDictionaries.h" #if USE_LIBPQXX @@ -13,7 +14,6 @@ #include "readInvalidateQuery.h" #include #include -#include #include #endif @@ -24,16 +24,17 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } +static const ValidateKeysMultiset dictionary_allowed_keys = { + "host", "port", "user", "password", "db", "database", "table", "schema", + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; + #if USE_LIBPQXX static const UInt64 max_block_size = 8192; -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "db", "database", "table", "schema", - "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; - namespace { ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where) @@ -177,6 +178,19 @@ std::string PostgreSQLDictionarySource::toString() const return "PostgreSQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where); } +static void validateConfigKeys( + const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) +{ + Poco::Util::AbstractConfiguration::Keys config_keys; + dict_config.keys(config_prefix, config_keys); + for (const auto & config_key : config_keys) + { + if (dictionary_allowed_keys.contains(config_key) || startsWith(config_key, "replica")) + continue; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); + } +} + #endif void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) @@ -191,38 +205,117 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { #if USE_LIBPQXX const auto settings_config_prefix = config_prefix + ".postgresql"; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; - auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key); const auto & settings = context->getSettingsRef(); + std::optional dictionary_configuration; + postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; + + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; + if (named_collection) + { + validateNamedCollection>(*named_collection, {}, dictionary_allowed_keys); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = named_collection->getOrDefault("host", ""); + common_configuration.port = named_collection->getOrDefault("port", 0); + common_configuration.username = named_collection->getOrDefault("user", ""); + common_configuration.password = named_collection->getOrDefault("password", ""); + common_configuration.database = named_collection->getAnyOrDefault({"database", "db"}, ""); + common_configuration.schema = named_collection->getOrDefault("schema", ""); + common_configuration.table = named_collection->getOrDefault("table", ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration{ + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = named_collection->getOrDefault("query", ""), + .where = named_collection->getOrDefault("where", ""), + .invalidate_query = named_collection->getOrDefault("invalidate_query", ""), + .update_field = named_collection->getOrDefault("update_field", ""), + .update_lag = named_collection->getOrDefault("update_lag", 1), + }); + + replicas_by_priority[0].emplace_back(common_configuration); + } + else + { + validateConfigKeys(config, settings_config_prefix); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = config.getString(settings_config_prefix + ".host", ""); + common_configuration.port = config.getUInt(settings_config_prefix + ".port", 0); + common_configuration.username = config.getString(settings_config_prefix + ".user", ""); + common_configuration.password = config.getString(settings_config_prefix + ".password", ""); + common_configuration.database = config.getString(fmt::format("{}.database", settings_config_prefix), config.getString(fmt::format("{}.db", settings_config_prefix), "")); + common_configuration.schema = config.getString(fmt::format("{}.schema", settings_config_prefix), ""); + common_configuration.table = config.getString(fmt::format("{}.table", settings_config_prefix), ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration + { + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), + .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), + .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), + .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), + .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) + }); + + + if (config.has(settings_config_prefix + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(settings_config_prefix, config_keys); + + for (const auto & config_key : config_keys) + { + if (config_key.starts_with("replica")) + { + String replica_name = settings_config_prefix + "." + config_key; + StoragePostgreSQL::Configuration replica_configuration{common_configuration}; + + size_t priority = config.getInt(replica_name + ".priority", 0); + replica_configuration.host = config.getString(replica_name + ".host", common_configuration.host); + replica_configuration.port = config.getUInt(replica_name + ".port", common_configuration.port); + replica_configuration.username = config.getString(replica_name + ".user", common_configuration.username); + replica_configuration.password = config.getString(replica_name + ".password", common_configuration.password); + + if (replica_configuration.host.empty() || replica_configuration.port == 0 + || replica_configuration.username.empty() || replica_configuration.password.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); + } + + replicas_by_priority[priority].emplace_back(replica_configuration); + } + } + } + else + { + replicas_by_priority[0].emplace_back(common_configuration); + } + } if (created_from_ddl) { - for (const auto & replicas : configuration.replicas_configurations) - for (const auto & replica : replicas.second) + for (const auto & [_, replicas] : replicas_by_priority) + for (const auto & replica : replicas) context->getRemoteHostFilter().checkHostAndPort(replica.host, toString(replica.port)); } + auto pool = std::make_shared( - configuration.replicas_configurations, + replicas_by_priority, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, settings.postgresql_connection_pool_retries, settings.postgresql_connection_pool_auto_close_connection, settings.postgresql_connection_attempt_timeout); - PostgreSQLDictionarySource::Configuration dictionary_configuration - { - .db = configuration.database, - .schema = configuration.schema, - .table = configuration.table, - .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), - .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), - .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), - .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), - .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) - }; - return std::make_unique(dict_struct, dictionary_configuration, pool, sample_block); + return std::make_unique(dict_struct, dictionary_configuration.value(), pool, sample_block); #else (void)dict_struct; (void)config; diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index fc6c98990d0..c264b480bcb 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -906,13 +906,13 @@ void RangeHashedDictionary::setAttributeValue(Attribute & a if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); StringRef string_ref = copyStringInArena(string_arena, string); value_to_insert = string_ref; } else { - value_to_insert = static_cast(value.get()); + value_to_insert = static_cast(value.safeGet()); } container.back() = value_to_insert; diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index 1736cdff306..9db639a0ca4 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -1,7 +1,6 @@ #include "RedisDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" #include #include @@ -160,7 +159,7 @@ namespace DB if (isInteger(type)) key << DB::toString(key_columns[i]->get64(row)); else if (isString(type)) - key << (*key_columns[i])[row].get(); + key << (*key_columns[i])[row].safeGet(); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of key in Redis dictionary"); } diff --git a/src/Disks/DiskFomAST.cpp b/src/Disks/DiskFomAST.cpp new file mode 100644 index 00000000000..5329ff8748a --- /dev/null +++ b/src/Disks/DiskFomAST.cpp @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::string getOrCreateCustomDisk(DiskConfigurationPtr config, const std::string & serialization, ContextPtr context, bool attach) +{ + Poco::Util::AbstractConfiguration::Keys disk_settings_keys; + config->keys(disk_settings_keys); + /// Check that no settings are defined when disk from the config is referred. + if (disk_settings_keys.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Disk function must have arguments. Invalid disk description."); + + if (disk_settings_keys.size() == 1 && disk_settings_keys.front() == "name" && !attach) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Disk function `{}` must have other arguments apart from `name`, which describe disk configuration. Invalid disk description.", + serialization); + + auto disk_settings_hash = sipHash128(serialization.data(), serialization.size()); + + std::string disk_name; + if (config->has("name")) + { + disk_name = config->getString("name"); + } + else + { + /// We need a unique name for a created custom disk, but it needs to be the same + /// after table is reattached or server is restarted, so take a hash of the disk + /// configuration serialized ast as a disk name suffix. + disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + toString(disk_settings_hash); + } + + + auto disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { + auto result = DiskFactory::instance().create( + disk_name, *config, /* config_path */"", context, disks_map, /* attach */attach, /* custom_disk */true); + /// Mark that disk can be used without storage policy. + result->markDiskAsCustom(disk_settings_hash); + return result; + }); + + if (!disk->isCustomDisk()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Disk `{}` already exists and is described by the config." + " It is impossible to redefine it.", + disk_name); + + if (disk->getCustomDiskSettings() != disk_settings_hash && !attach) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The disk `{}` is already configured as a custom disk in another table. It can't be redefined with different settings.", + disk_name); + + if (!attach && !disk->isRemote()) + { + static constexpr auto custom_local_disks_base_dir_in_config = "custom_local_disks_base_directory"; + auto disk_path_expected_prefix = context->getConfigRef().getString(custom_local_disks_base_dir_in_config, ""); + + if (disk_path_expected_prefix.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Base path for custom local disks must be defined in config file by `{}`", + custom_local_disks_base_dir_in_config); + + if (!pathStartsWith(disk->getPath(), disk_path_expected_prefix)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path of the custom local disk must be inside `{}` directory", + disk_path_expected_prefix); + } + + return disk_name; +} + +class DiskConfigurationFlattener +{ +public: + struct Data + { + ContextPtr context; + bool attach; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(ASTPtr & ast, Data & data) + { + if (isDiskFunction(ast)) + { + const auto * function = ast->as(); + const auto * function_args_expr = assert_cast(function->arguments.get()); + const auto & function_args = function_args_expr->children; + auto config = getDiskConfigurationFromAST(function_args, data.context); + auto disk_setting_string = serializeAST(*function); + auto disk_name = getOrCreateCustomDisk(config, disk_setting_string, data.context, data.attach); + ast = std::make_shared(disk_name); + } + } +}; + + +std::string DiskFomAST::createCustomDisk(const ASTPtr & disk_function_ast, ContextPtr context, bool attach) +{ + if (!isDiskFunction(disk_function_ast)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); + + auto ast = disk_function_ast->clone(); + + using FlattenDiskConfigurationVisitor = InDepthNodeVisitor; + FlattenDiskConfigurationVisitor::Data data{context, attach}; + FlattenDiskConfigurationVisitor{data}.visit(ast); + + return assert_cast(*ast).value.safeGet(); +} + +void DiskFomAST::ensureDiskIsNotCustom(const std::string & disk_name, ContextPtr context) +{ + auto disk = context->getDisk(disk_name); + + if (disk->isCustomDisk()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Disk name `{}` is a custom disk that is used in other table. " + "That disk could not be used by a reference by other tables. The custom disk should be fully specified with a disk function.", + disk_name); +} + +} diff --git a/src/Disks/DiskFomAST.h b/src/Disks/DiskFomAST.h new file mode 100644 index 00000000000..0a30834533e --- /dev/null +++ b/src/Disks/DiskFomAST.h @@ -0,0 +1,15 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +namespace DiskFomAST +{ + void ensureDiskIsNotCustom(const std::string & name, ContextPtr context); + std::string createCustomDisk(const ASTPtr & disk_function, ContextPtr context, bool attach); +} + +} diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 49a1be5cf50..e6e2c257911 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -6,6 +6,8 @@ #include #include +#include +#include namespace DB { diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 412ad27e94f..78d5f37e3a7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -464,9 +464,9 @@ public: virtual void chmod(const String & /*path*/, mode_t /*mode*/) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk does not support chmod"); } /// Was disk created to be used without storage configuration? - bool isCustomDisk() const { return is_custom_disk; } - - void markDiskAsCustom() { is_custom_disk = true; } + bool isCustomDisk() const { return custom_disk_settings_hash != 0; } + UInt128 getCustomDiskSettings() const { return custom_disk_settings_hash; } + void markDiskAsCustom(UInt128 settings_hash) { custom_disk_settings_hash = settings_hash; } virtual DiskPtr getDelegateDiskIfExists() const { return nullptr; } @@ -504,7 +504,8 @@ protected: private: ThreadPool copying_thread_pool; - bool is_custom_disk = false; + // 0 means the disk is not custom, the disk is predefined in the config + UInt128 custom_disk_settings_hash = 0; /// Check access to the disk. void checkAccess(); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index b471f3fc58f..56bfa019819 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -645,8 +645,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); + std::string failure_reason; bool continue_predownload = file_segment.reserve( - current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); + current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, failure_reason); if (continue_predownload) { LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size); @@ -1002,7 +1003,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() { chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right); - bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); + std::string failure_reason; + bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, failure_reason); if (success) { chassert(file_segment.getCurrentWriteOffset() == static_cast(implementation_buffer->getPosition())); @@ -1028,7 +1030,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() LOG_TRACE(log, "Bypassing cache because writeCache method failed"); } else - LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size); + LOG_TRACE(log, "No space left in cache to reserve {} bytes, reason: {}, " + "will continue without cache download", size, failure_reason); if (!success) { diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index 382c4a80cc4..103ae0e1832 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -91,7 +91,8 @@ bool FileSegmentRangeWriter::write(char * data, size_t size, size_t offset, File size_t size_to_write = std::min(available_size, size); - bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds); + std::string failure_reason; + bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason); if (!reserved) { appendFilesystemCacheLog(*file_segment); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index bb9761a3905..c96f5f0c931 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -80,20 +80,27 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c if (with_file_cache) { - auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); - buf = std::make_unique( - object_path, - cache_key, - settings.remote_fs_cache, - FileCache::getCommonUser(), - [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, - settings, - query_id, - object.bytes_size, - /* allow_seeks */false, - /* use_external_buffer */true, - /* read_until_position */std::nullopt, - cache_log); + if (settings.remote_fs_cache->isInitialized()) + { + auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); + buf = std::make_unique( + object_path, + cache_key, + settings.remote_fs_cache, + FileCache::getCommonUser(), + [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, + settings, + query_id, + object.bytes_size, + /* allow_seeks */false, + /* use_external_buffer */true, + /* read_until_position */std::nullopt, + cache_log); + } + else + { + settings.remote_fs_cache->throwInitExceptionIfNeeded(); + } } /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index fb817005399..ab0d357119c 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -99,7 +99,7 @@ std::unique_ptr CachedObjectStorage::writeObject( /// N /// Need to remove even if cache_on_write == false. removeCacheIfExists(object.remote_path); - if (cache_on_write) + if (cache_on_write && cache->isInitialized()) { auto key = getCacheKey(object.remote_path); return std::make_unique( @@ -122,7 +122,8 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c return; /// Add try catch? - cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); + if (cache->isInitialized()) + cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); } void CachedObjectStorage::removeObject(const StoredObject & object) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 433a0e96d2e..8de80971238 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -63,7 +63,7 @@ void throwIfError(const Aws::Utils::Outcome & response) { const auto & err = response.GetError(); throw S3Exception( - fmt::format("{} (Code: {}, s3 exception: {})", + fmt::format("{} (Code: {}, S3 exception: '{}')", err.GetMessage(), static_cast(err.GetErrorType()), err.GetExceptionName()), err.GetErrorType()); } @@ -305,7 +305,8 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet S3::ListObjectsV2Request request; request.SetBucket(uri.bucket); - request.SetPrefix(path); + if (path != "/") + request.SetPrefix(path); if (max_keys) request.SetMaxKeys(static_cast(max_keys)); else diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h index 501e033abc3..8e49ed910e3 100644 --- a/src/Disks/StoragePolicy.h +++ b/src/Disks/StoragePolicy.h @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp deleted file mode 100644 index fd43f31a009..00000000000 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -namespace -{ - std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context, bool attach) - { - const auto * function_args_expr = assert_cast(function.arguments.get()); - const auto & function_args = function_args_expr->children; - auto config = getDiskConfigurationFromAST(function_args, context); - - std::string disk_name; - if (config->has("name")) - { - disk_name = config->getString("name"); - } - else - { - /// We need a unique name for a created custom disk, but it needs to be the same - /// after table is reattached or server is restarted, so take a hash of the disk - /// configuration serialized ast as a disk name suffix. - auto disk_setting_string = serializeAST(function); - disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX - + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); - } - - auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { - auto disk = DiskFactory::instance().create( - disk_name, *config, /* config_path */"", context, disks_map, /* attach */attach, /* custom_disk */true); - /// Mark that disk can be used without storage policy. - disk->markDiskAsCustom(); - return disk; - }); - - if (!result_disk->isCustomDisk()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk with name `{}` already exist", disk_name); - - if (!attach && !result_disk->isRemote()) - { - static constexpr auto custom_local_disks_base_dir_in_config = "custom_local_disks_base_directory"; - auto disk_path_expected_prefix = context->getConfigRef().getString(custom_local_disks_base_dir_in_config, ""); - - if (disk_path_expected_prefix.empty()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Base path for custom local disks must be defined in config file by `{}`", - custom_local_disks_base_dir_in_config); - - if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix)) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path of the custom local disk must be inside `{}` directory", - disk_path_expected_prefix); - } - - return disk_name; - } - - class DiskConfigurationFlattener - { - public: - struct Data - { - ContextPtr context; - bool attach; - }; - - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } - - static void visit(ASTPtr & ast, Data & data) - { - if (isDiskFunction(ast)) - { - auto disk_name = getOrCreateDiskFromDiskAST(*ast->as(), data.context, data.attach); - ast = std::make_shared(disk_name); - } - } - }; - - /// Visits children first. - using FlattenDiskConfigurationVisitor = InDepthNodeVisitor; -} - - -std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context, bool attach) -{ - if (!isDiskFunction(disk_function)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); - - auto ast = disk_function->clone(); - - FlattenDiskConfigurationVisitor::Data data{context, attach}; - FlattenDiskConfigurationVisitor{data}.visit(ast); - - auto disk_name = assert_cast(*ast).value.get(); - LOG_TRACE(getLogger("getOrCreateDiskFromDiskAST"), "Result disk name: {}", disk_name); - return disk_name; -} - -} diff --git a/src/Disks/getOrCreateDiskFromAST.h b/src/Disks/getOrCreateDiskFromAST.h deleted file mode 100644 index 61e1decbee9..00000000000 --- a/src/Disks/getOrCreateDiskFromAST.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -class ASTFunction; - -/** - * Create a DiskPtr from disk AST function like disk(), - * add it to DiskSelector by a unique (but always the same for given configuration) disk name - * and return this name. - */ -std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context, bool attach); - -} diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 58407a810c5..5429d8b7e0d 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -419,10 +419,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo String result = getAdditionalFormatInfoForAllRowBasedFormats(settings); /// First, settings that are common for all text formats: result += fmt::format( - ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}", + ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}, try_infer_datetimes_only_datetime64={}", settings.try_infer_integers, settings.try_infer_dates, - settings.try_infer_datetimes); + settings.try_infer_datetimes, + settings.try_infer_datetimes_only_datetime64); /// Second, format-specific settings: switch (escaping_rule) @@ -463,7 +464,7 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo settings.json.read_arrays_as_strings, settings.json.try_infer_objects_as_tuples, settings.json.infer_incomplete_types_as_strings, - settings.json.allow_object_type, + settings.json.allow_deprecated_object_type, settings.json.use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects); break; default: diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a78836ff63c..f1214aac7dc 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -146,11 +146,13 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; - format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type; + format_settings.json.allow_deprecated_object_type = context->getSettingsRef().allow_experimental_object_type; + format_settings.json.allow_json_type = context->getSettingsRef().allow_experimental_json_type; format_settings.json.compact_allow_variable_number_of_columns = settings.input_format_json_compact_allow_variable_number_of_columns; format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.type_json_skip_duplicated_paths = settings.type_json_skip_duplicated_paths; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; @@ -266,6 +268,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.try_infer_datetimes_only_datetime64 = settings.input_format_try_infer_datetimes_only_datetime64; format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats; format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; @@ -280,6 +283,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; + format_settings.try_infer_variant = settings.input_format_try_infer_variants; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f0359218775..ed178a68b9d 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -35,6 +35,7 @@ struct FormatSettings bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; bool is_writing_to_terminal = false; + bool try_infer_variant = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 25000; @@ -46,6 +47,7 @@ struct FormatSettings bool try_infer_integers = true; bool try_infer_dates = true; bool try_infer_datetimes = true; + bool try_infer_datetimes_only_datetime64 = false; bool try_infer_exponent_floats = false; enum class DateTimeInputFormat : uint8_t @@ -227,13 +229,15 @@ struct FormatSettings bool try_infer_numbers_from_strings = false; bool validate_types_from_metadata = true; bool validate_utf8 = false; - bool allow_object_type = false; + bool allow_deprecated_object_type = false; + bool allow_json_type = false; bool valid_output_on_exception = false; bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool type_json_skip_duplicated_paths = false; } json{}; struct diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 242d2dc9f80..122224535a7 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -8,7 +8,6 @@ #if USE_RAPIDJSON #include #endif - #include #include @@ -22,6 +21,7 @@ #include #include #include +#include #include #include @@ -38,8 +38,10 @@ #include #include #include +#include #include #include +#include #include @@ -53,6 +55,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INCORRECT_DATA; } template @@ -123,7 +126,7 @@ void jsonElementToString(const typename JSONParser::Element & element, WriteBuff template bool tryGetNumericValueFromJSONElement( - NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error) + NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error) { switch (element.type()) { @@ -135,7 +138,7 @@ bool tryGetNumericValueFromJSONElement( /// But it will be more convenient for user to perform conversion. value = static_cast(element.getDouble()); } - else if (!accurate::convertNumeric(element.getDouble(), value)) + else if (!allow_type_conversion || !accurate::convertNumeric(element.getDouble(), value)) { error = fmt::format("cannot convert double value {} to {}", element.getDouble(), TypeName); return false; @@ -158,7 +161,7 @@ bool tryGetNumericValueFromJSONElement( case ElementType::BOOL: if constexpr (is_integer) { - if (convert_bool_to_integer) + if (convert_bool_to_integer && allow_type_conversion) { value = static_cast(element.getBool()); break; @@ -166,13 +169,17 @@ bool tryGetNumericValueFromJSONElement( } error = fmt::format("cannot convert bool value to {}", TypeName); return false; - case ElementType::STRING: { + case ElementType::STRING: + { + if (!allow_type_conversion) + return false; + auto rb = ReadBufferFromMemory{element.getString()}; if constexpr (std::is_floating_point_v) { if (!tryReadFloatText(value, rb) || !rb.eof()) { - error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + error = fmt::format("cannot parse {} value here: \"{}\"", TypeName, element.getString()); return false; } } @@ -186,13 +193,13 @@ bool tryGetNumericValueFromJSONElement( rb.position() = rb.buffer().begin(); if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) { - error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + error = fmt::format("cannot parse {} value here: \"{}\"", TypeName, element.getString()); return false; } if (!accurate::convertNumeric(tmp_float, value)) { - error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + error = fmt::format("cannot parse {} value here: \"{}\"", TypeName, element.getString()); return false; } } @@ -241,8 +248,16 @@ public: return false; } + if (is_bool_type && !insert_settings.allow_type_conversion) + { + if (!element.isBool()) + return false; + assert_cast &>(column).insertValue(element.getBool()); + return true; + } + NumberType value; - if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, error)) + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, insert_settings.allow_type_conversion, error)) { if (error.empty()) error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); @@ -289,8 +304,17 @@ public: return false; } + if (this->is_bool_type && !insert_settings.allow_type_conversion) + { + if (!element.isBool()) + return false; + UInt8 value = element.getBool(); + assert_cast(column).insertData(reinterpret_cast(&value), sizeof(value)); + return true; + } + NumberType value; - if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, error)) + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, insert_settings.allow_type_conversion, error)) { if (error.empty()) error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); @@ -316,7 +340,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -333,6 +357,9 @@ public: if (!element.isString()) { + if (!insert_settings.allow_type_conversion) + return false; + auto & col_str = assert_cast(column); auto & chars = col_str.getChars(); WriteBufferFromVector buf(chars, AppendModeTag()); @@ -360,7 +387,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -378,6 +405,9 @@ public: if (!element.isString()) { + if (!insert_settings.allow_type_conversion) + return false; + auto value = jsonElementToString(element, format_settings); assert_cast(column).insertData(value.data(), value.size()); } @@ -402,7 +432,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -419,7 +449,11 @@ public: } if (!element.isString()) + { + if (!insert_settings.allow_type_conversion) + return false; return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + } return checkValueSizeAndInsert(column, element.getString(), error); } @@ -450,7 +484,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -466,7 +500,11 @@ public: } if (!element.isString()) + { + if (!insert_settings.allow_type_conversion) + return false; return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + } return checkValueSizeAndInsert(column, element.getString(), error); } @@ -630,7 +668,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -649,7 +687,7 @@ public: return false; } } - else if (element.isUInt64()) + else if (element.isUInt64() && insert_settings.allow_type_conversion) { value = element.getUInt64(); } @@ -712,7 +750,8 @@ public: case ElementType::INT64: value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); break; - case ElementType::STRING: { + case ElementType::STRING: + { auto rb = ReadBufferFromMemory{element.getString()}; if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, scale)) { @@ -721,7 +760,8 @@ public: } break; } - case ElementType::NULL_VALUE: { + case ElementType::NULL_VALUE: + { if (!format_settings.null_as_default) { error = "cannot convert null to Decimal value"; @@ -756,7 +796,7 @@ public: bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, - const JSONExtractInsertSettings &, + const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { @@ -777,6 +817,9 @@ public: } else { + if (!insert_settings.allow_type_conversion) + return false; + switch (element.type()) { case ElementType::DOUBLE: @@ -1104,7 +1147,7 @@ public: } } - if (!were_valid_elements) + if (data.size() != old_size && !were_valid_elements) { data.popBack(data.size() - old_size); return false; @@ -1174,7 +1217,7 @@ public: else { set_size(old_size); - error += fmt::format("(during reading tuple {} element)", index); + error += fmt::format(" (during reading tuple {} element)", index); return false; } } @@ -1202,7 +1245,7 @@ public: else { set_size(old_size); - error += fmt::format("(during reading tuple {} element)", index); + error += fmt::format(" (during reading tuple {} element)", index); return false; } } @@ -1221,7 +1264,7 @@ public: else if (!insert_settings.insert_default_on_invalid_elements_in_complex_types) { set_size(old_size); - error += fmt::format("(during reading tuple element \"{}\")", key); + error += fmt::format(" (during reading tuple element \"{}\")", key); return false; } } @@ -1288,7 +1331,7 @@ public: { key_col.popBack(key_col.size() - offsets.back()); value_col.popBack(value_col.size() - offsets.back()); - error += fmt::format("(during reading value of key \"{}\")", pair.first); + error += fmt::format(" (during reading value of key \"{}\")", pair.first); return false; } } @@ -1346,6 +1389,13 @@ template class DynamicNode : public JSONExtractTreeNode { public: + explicit DynamicNode( + size_t max_dynamic_paths_for_object_ = DataTypeObject::DEFAULT_MAX_SEPARATELY_STORED_PATHS, + size_t max_dynamic_types_for_object_ = DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES) + : max_dynamic_paths_for_object(max_dynamic_paths_for_object_), max_dynamic_types_for_object(max_dynamic_types_for_object_) + { + } + bool insertResultToColumn( IColumn & column, const typename JSONParser::Element & element, @@ -1354,7 +1404,7 @@ public: String & error) const override { auto & column_dynamic = assert_cast(column); - /// First, check if element is NULL. + /// Check if element is NULL. if (element.isNull()) { column_dynamic.insertDefault(); @@ -1362,59 +1412,86 @@ public: } auto & variant_column = column_dynamic.getVariantColumn(); - auto variant_info = column_dynamic.getVariantInfo(); - /// Second, infer ClickHouse type for this element and add it as a new variant. - auto element_type = elementToDataType(element, format_settings); - if (column_dynamic.addNewVariant(element_type)) + const auto & variant_info = column_dynamic.getVariantInfo(); + const auto & variant_types = assert_cast(*variant_info.variant_type).getVariants(); + + /// Try to insert element into current variants but with no types conversion. + /// We want to avoid inferring the type on each row, so if we can insert this element into + /// any existing variant with no types conversion (like Integer -> String, Double -> Integer, etc) + /// we will do it and won't try to infer the type. + auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator(); + auto insert_settings_with_no_type_conversion = insert_settings; + insert_settings_with_no_type_conversion.allow_type_conversion = false; + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) { - auto node = buildJSONExtractTree(element_type, "Dynamic inference"); - auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; + if (i != shared_variant_discr) + { + auto it = json_extract_nodes_cache.find(variant_info.variant_names[i]); + if (it == json_extract_nodes_cache.end()) + it = json_extract_nodes_cache.emplace(variant_info.variant_names[i], buildJSONExtractTree(variant_types[i], "Dynamic inference")).first; + + if (it->second->insertResultToColumn(variant_column.getVariantByGlobalDiscriminator(i), element, insert_settings_with_no_type_conversion, format_settings, error)) + { + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(i)); + variant_column.getOffsets().push_back(variant_column.getVariantByGlobalDiscriminator(i).size() - 1); + return true; + } + } + } + + /// We couldn't insert element into current variants, infer ClickHouse type for this element and add it as a new variant. + auto element_type = removeNullable(elementToDataType(element, format_settings)); + if (!checkIfTypeIsComplete(element_type)) + { + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot infer the type of JSON element {}, because it contains only nulls. To use String type for elements with incomplete " + "type, enable setting input_format_json_infer_incomplete_types_as_strings", + jsonElementToString(element, format_settings)); + } + + auto element_type_name = element_type->getName(); + if (column_dynamic.addNewVariant(element_type, element_type_name)) + { + auto it = json_extract_nodes_cache.find(element_type_name); + if (it == json_extract_nodes_cache.end()) + it = json_extract_nodes_cache.emplace(element_type_name, buildJSONExtractTree(element_type, "Dynamic inference")).first; + auto global_discriminator = variant_info.variant_name_to_discriminator.at(element_type_name); auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); - if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error)) + if (!it->second->insertResultToColumn(variant, element, insert_settings, format_settings, error)) return false; variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); variant_column.getOffsets().push_back(variant.size() - 1); return true; } - /// We couldn't add new variant. Try to insert element into current variants. - auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); - if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) - return true; - - /// We couldn't insert element into any existing variant, add String variant and read value as String. - column_dynamic.addStringVariant(); - auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; - auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); - if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + /// We couldn't add this variant, insert it into shared variant. + auto tmp_variant_column = element_type->createColumn(); + auto node = buildJSONExtractTree(element_type, "Dynamic inference"); + if (!node->insertResultToColumn(*tmp_variant_column, element, insert_settings, format_settings, error)) return false; - variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); - variant_column.getOffsets().push_back(string_column.size() - 1); + + column_dynamic.insertValueIntoSharedVariant(*tmp_variant_column, element_type, element_type_name, 0); return true; } - static const std::unique_ptr> & getStringNode() - { - static const std::unique_ptr> string_node - = buildJSONExtractTree(std::make_shared(), "Dynamic inference"); - return string_node; - } - - static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) + DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) const { JSONInferenceInfo json_inference_info; auto type = elementToDataTypeImpl(element, format_settings, json_inference_info); transformFinalInferredJSONTypeIfNeeded(type, format_settings, &json_inference_info); + if (format_settings.schema_inference_make_columns_nullable && type->haveSubtypes()) + type = makeNullableRecursively(type); return type; } private: - static DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) + DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) const { switch (element.type()) { case ElementType::NULL_VALUE: - return makeNullable(std::make_shared()); + return std::make_shared(std::make_shared()); case ElementType::BOOL: return DataTypeFactory::instance().get("Bool"); case ElementType::INT64: @@ -1452,10 +1529,10 @@ private: DataTypes types; types.reserve(array.size()); for (auto value : array) - types.push_back(makeNullableSafe(elementToDataTypeImpl(value, format_settings, json_inference_info))); + types.push_back(elementToDataTypeImpl(value, format_settings, json_inference_info)); if (types.empty()) - return std::make_shared(makeNullable(std::make_shared())); + return std::make_shared(std::make_shared()); if (checkIfTypesAreEqual(types)) return std::make_shared(types.back()); @@ -1482,12 +1559,238 @@ private: return std::make_shared(types); } - case ElementType::OBJECT: { - /// TODO: Use new JSON type here when it's ready. - return std::make_shared(std::make_shared(), makeNullable(std::make_shared())); + case ElementType::OBJECT: + { + return std::make_shared(DataTypeObject::SchemaFormat::JSON, max_dynamic_paths_for_object, max_dynamic_types_for_object); } } } + + size_t max_dynamic_paths_for_object; + size_t max_dynamic_types_for_object; + + /// Avoid building JSONExtractTreeNode for the same data types on each row by using cache. + mutable std::unordered_map>> json_extract_nodes_cache; +}; + +template +class ObjectJSONNode : public JSONExtractTreeNode +{ +public: + ObjectJSONNode( + std::unordered_map>> typed_path_nodes_, + const std::unordered_set & paths_to_skip_, + const std::vector & path_regexps_to_skip_, + size_t max_dynamic_paths_, + size_t max_dynamic_types_) + : typed_path_nodes(std::move(typed_path_nodes_)) + , paths_to_skip(paths_to_skip_) + , dynamic_node(std::make_unique>( + max_dynamic_paths_ / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR, + max_dynamic_types_ / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR)) + , dynamic_serialization(std::make_shared()) + { + sorted_paths_to_skip.assign(paths_to_skip.begin(), paths_to_skip.end()); + std::sort(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end()); + for (const auto & regexp : path_regexps_to_skip_) + path_regexps_to_skip.emplace_back(regexp); + } + + bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isObject()) + { + error = fmt::format("Cannot read JSON object from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto & column_object = assert_cast(column); + size_t prev_size = column_object.size(); + + /// Paths in shared data should be sorted, so we cannot insert paths there during traverse. + /// Instead we collect all paths and values that should go to shared data, sort them and insert later. + /// It's not optimal, but it's a price we pay for faster reading of subcolumns. + std::vector> paths_and_values_for_shared_data; + if (!traverseAndInsert(column_object, element, "", insert_settings, format_settings, paths_and_values_for_shared_data, prev_size, error)) + { + /// If there was an error, restore previous state. + SerializationObject::restoreColumnObject(column_object, prev_size); + return false; + } + + /// Fill shared data. + auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + std::sort(paths_and_values_for_shared_data.begin(), paths_and_values_for_shared_data.end()); + for (size_t i = 0; i != paths_and_values_for_shared_data.size(); ++i) + { + const auto & [path, value] = paths_and_values_for_shared_data[i]; + /// Check if we duplicated paths. + if (i != 0 && path == paths_and_values_for_shared_data[i - 1].first) + { + if (!format_settings.json.type_json_skip_duplicated_paths) + { + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); + SerializationObject::restoreColumnObject(column_object, prev_size); + return false; + } + } + else + { + shared_data_paths->insertData(path.data(), path.size()); + shared_data_values->insertData(value.data(), value.size()); + } + } + column_object.getSharedDataOffsets().push_back(shared_data_paths->size()); + + /// Fill remaining typed and dynamic paths. + for (auto & [_, typed_column] : column_object.getTypedPaths()) + { + if (typed_column->size() == prev_size) + typed_column->insertDefault(); + } + + for (auto & [_, dynamic_column] : column_object.getDynamicPathsPtrs()) + { + if (dynamic_column->size() == prev_size) + dynamic_column->insertDefault(); + } + + return true; + } + +private: + bool traverseAndInsert( + ColumnObject & column_object, + const typename JSONParser::Element & element, + const String & current_path, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + std::vector> & paths_and_values_for_shared_data, + size_t current_size, + String & error) const + { + if (shouldSkipPath(current_path)) + return true; + + if (element.isObject() && !typed_path_nodes.contains(current_path)) + { + for (auto [key, value] : element.getObject()) + { + String path = current_path; + if (!path.empty()) + path.append("."); + path += key; + if (!traverseAndInsert(column_object, value, path, insert_settings, format_settings, paths_and_values_for_shared_data, current_size, error)) + return false; + } + + return true; + } + + auto & typed_paths = column_object.getTypedPaths(); + auto & dynamic_paths_ptrs = column_object.getDynamicPathsPtrs(); + /// Check if we have this path in typed paths. + if (auto typed_it = typed_paths.find(current_path); typed_it != typed_paths.end()) + { + /// Check if we already had this path. + if (typed_it->second->size() > current_size) + { + if (!format_settings.json.type_json_skip_duplicated_paths) + { + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path); + return false; + } + } + else if (!typed_path_nodes.at(current_path)->insertResultToColumn(*typed_it->second, element, insert_settings, format_settings, error)) + { + error += fmt::format(" (while reading path {})", current_path); + return false; + } + } + /// Check if we have this path in dynamic paths. + else if (auto dynamic_it = dynamic_paths_ptrs.find(current_path); dynamic_it != dynamic_paths_ptrs.end()) + { + /// Check if we already had this path. + if (dynamic_it->second->size() > current_size) + { + if (!format_settings.json.type_json_skip_duplicated_paths) + { + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path); + return false; + } + } + else if (!dynamic_node->insertResultToColumn(*dynamic_it->second, element, insert_settings, format_settings, error)) + { + error += fmt::format(" (while reading path {})", current_path); + return false; + } + } + /// Don't create new dynamic paths for null and don't insert null values into shared data. + /// We consider null equivalent to the absence of this path. + else if (element.isNull()) + { + } + /// Try to add a new dynamic path. + else if (auto * dynamic_column = column_object.tryToAddNewDynamicPath(current_path)) + { + if (!dynamic_node->insertResultToColumn(*dynamic_column, element, insert_settings, format_settings, error)) + { + error += fmt::format(" (while reading path {})", current_path); + return false; + } + } + /// Otherwise this path should go to the shared data. + else + { + auto tmp_dynamic_column = ColumnDynamic::create(); + tmp_dynamic_column->reserve(1); + if (!dynamic_node->insertResultToColumn(*tmp_dynamic_column, element, insert_settings, format_settings, error)) + { + error += fmt::format(" (while reading path {})", current_path); + return false; + } + + paths_and_values_for_shared_data.emplace_back(current_path, ""); + WriteBufferFromString buf(paths_and_values_for_shared_data.back().second); + dynamic_serialization->serializeBinary(*tmp_dynamic_column, 0, buf, format_settings); + } + + return true; + } + + bool shouldSkipPath(const String & path) const + { + if (paths_to_skip.contains(path)) + return true; + + if (!sorted_paths_to_skip.empty()) + { + auto it = std::lower_bound(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end(), path); + if (it != sorted_paths_to_skip.begin() && path.starts_with(*std::prev(it))) + return true; + } + + for (const auto & regexp : path_regexps_to_skip) + { + if (re2::RE2::FullMatch(path, regexp)) + return true; + } + + return false; + } + + std::unordered_map>> typed_path_nodes; + std::unordered_set paths_to_skip; + std::vector sorted_paths_to_skip; + std::list path_regexps_to_skip; + std::unique_ptr> dynamic_node; + std::shared_ptr dynamic_serialization; }; } @@ -1634,6 +1937,26 @@ std::unique_ptr> buildJSONExtractTree(const Data } case TypeIndex::Dynamic: return std::make_unique>(); + case TypeIndex::Object: + { + const auto & object_type = assert_cast(*type); + const auto & typed_paths = object_type.getTypedPaths(); + std::unordered_map>> typed_path_nodes; + typed_path_nodes.reserve(typed_paths.size()); + for (const auto & [path, path_type] : typed_paths) + typed_path_nodes[path] = buildJSONExtractTree(path_type, source_for_exception_message); + + switch (object_type.getSchemaFormat()) + { + case DataTypeObject::SchemaFormat::JSON: + return std::make_unique>( + std::move(typed_path_nodes), + object_type.getPathsToSkip(), + object_type.getPathRegexpsToSkip(), + object_type.getMaxDynamicPaths(), + object_type.getMaxDynamicTypes()); + } + } default: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -1651,7 +1974,7 @@ template std::unique_ptr> buildJSONExtractTr #if USE_RAPIDJSON template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); -template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, String & error); +template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error); #else template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h index b5e82506548..89f2d191dfb 100644 --- a/src/Formats/JSONExtractTree.h +++ b/src/Formats/JSONExtractTree.h @@ -17,6 +17,9 @@ struct JSONExtractInsertSettings /// For example, if we have [1, "hello", 2] and type Array(UInt32), /// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions. bool insert_default_on_invalid_elements_in_complex_types = false; + /// If false, JSON value will be inserted into column only if type of the value is + /// the same as column type (no conversions like Integer -> String, Integer -> Float, etc). + bool allow_type_conversion = true; }; template @@ -36,6 +39,6 @@ template void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template -bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error); +bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, bool allow_type_conversion, String & error); } diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index f0985f4a6b7..9d898cd2470 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -487,6 +487,8 @@ namespace JSONUtils size_t rows, size_t rows_before_limit, bool applied_limit, + size_t rows_before_aggregation, + bool applied_aggregation, const Stopwatch & watch, const Progress & progress, bool write_statistics, @@ -502,7 +504,12 @@ namespace JSONUtils writeTitle("rows_before_limit_at_least", out, 1, " "); writeIntText(rows_before_limit, out); } - + if (applied_aggregation) + { + writeFieldDelimiter(out, 2); + writeTitle("rows_before_aggregation", out, 1, " "); + writeIntText(rows_before_aggregation, out); + } if (write_statistics) { writeFieldDelimiter(out, 2); diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 7ee111c1285..e2ac3467971 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -104,6 +104,8 @@ namespace JSONUtils size_t rows, size_t rows_before_limit, bool applied_limit, + size_t rows_before_aggregation, + bool applied_aggregation, const Stopwatch & watch, const Progress & progress, bool write_statistics, diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 617595c19a2..dbbd728ed72 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -164,7 +164,7 @@ try return {*iterator_data.cached_columns, *format_name}; } - schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFilePath()); continue; } @@ -250,7 +250,7 @@ try if (!names_and_types.empty()) read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types)); - schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath()); } catch (...) { @@ -411,7 +411,7 @@ try throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually"); read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types)); - schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath()); } if (format_name && mode == SchemaInferenceMode::DEFAULT) @@ -527,9 +527,9 @@ try } catch (Exception & e) { - auto file_name = read_buffer_iterator.getLastFileName(); - if (!file_name.empty()) - e.addMessage(fmt::format("(in file/uri {})", file_name)); + auto file_path = read_buffer_iterator.getLastFilePath(); + if (!file_path.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_path)); throw; } diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index bb5e068f696..7168e7f0817 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -56,8 +56,8 @@ struct IReadBufferIterator /// Set auto detected format name. virtual void setFormatName(const String & /*format_name*/) {} - /// Get last processed file name for better exception messages. - virtual String getLastFileName() const { return ""; } + /// Get last processed file path for better exception messages. + virtual String getLastFilePath() const { return ""; } /// Return true if method recreateLastReadBuffer is implemented. virtual bool supportsLastReadBufferRecreation() const { return false; } diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 3c374ada9e6..e8eab3b4453 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -7,11 +7,12 @@ #include #include #include +#include #include #include #include #include -#include +#include #include #include #include @@ -306,37 +307,72 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// If we have only Date and DateTime types, convert Date to DateTime, - /// otherwise, convert all Date and DateTime to String. + /// if setting 'try_infer_variant' is true then we convert to type variant. + void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + if (checkIfTypesAreEqual(data_types)) + return; + + DataTypes variant_types; + for (const auto & type : data_types) + { + if (const auto * variant_type = typeid_cast(type.get())) + { + const auto & current_variants = variant_type->getVariants(); + variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end()); + } + else + { + variant_types.push_back(type); + } + } + + auto variant_type = std::make_shared(variant_types); + + for (auto & type : data_types) + type = variant_type; + type_indexes = {TypeIndex::Variant}; + } + + /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type, + /// otherwise, convert all Date, DateTime and DateTime64 to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_dates = type_indexes.contains(TypeIndex::Date); - bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); - bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes))); + bool have_datetimes = type_indexes.contains(TypeIndex::DateTime); + bool have_datetimes64 = type_indexes.contains(TypeIndex::DateTime64); + bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes) + static_cast(have_datetimes64))); - if (!all_dates_or_datetimes && (have_dates || have_datetimes)) + if (!all_dates_or_datetimes && (have_dates || have_datetimes || have_datetimes64)) { for (auto & type : data_types) { - if (isDate(type) || isDateTime64(type)) + if (isDate(type) || isDateTime(type) || isDateTime64(type)) type = std::make_shared(); } type_indexes.erase(TypeIndex::Date); type_indexes.erase(TypeIndex::DateTime); + type_indexes.erase(TypeIndex::DateTime64); type_indexes.insert(TypeIndex::String); return; } - if (have_dates && have_datetimes) + for (auto & type : data_types) { - for (auto & type : data_types) + if (isDate(type) && (have_datetimes || have_datetimes64)) { - if (isDate(type)) + if (have_datetimes64) type = std::make_shared(9); + else + type = std::make_shared(); + type_indexes.erase(TypeIndex::Date); + } + else if (isDateTime(type) && have_datetimes64) + { + type = std::make_shared(9); + type_indexes.erase(TypeIndex::DateTime); } - - type_indexes.erase(TypeIndex::Date); } } @@ -644,7 +680,11 @@ namespace transformDatesAndDateTimes(data_types, type_indexes); if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Check settings specific for JSON formats. @@ -662,6 +702,10 @@ namespace if (settings.json.try_infer_objects_as_tuples) mergeJSONPaths(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + }; auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) @@ -674,7 +718,11 @@ namespace transformNothingComplexTypes(data_types, type_indexes); if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); @@ -687,6 +735,9 @@ namespace if (json_info && json_info->allow_merging_named_tuples) mergeNamedTuples(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); }; transformTypesRecursively(types, transform_simple_types, transform_complex_types); @@ -697,55 +748,87 @@ namespace bool tryInferDate(std::string_view field) { - if (field.empty()) + /// Minimum length of Date text representation is 8 (YYYY-M-D) and maximum is 10 (YYYY-MM-DD) + if (field.size() < 8 || field.size() > 10) return false; - ReadBufferFromString buf(field); - Float64 tmp_float; /// Check if it's just a number, and if so, don't try to infer Date from it, /// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01) /// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases. - if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; - - buf.seek(0, SEEK_SET); /// Return position to the beginning - - DayNum tmp; - return tryReadDateText(tmp, buf) && buf.eof(); - } - - bool tryInferDateTime(std::string_view field, const FormatSettings & settings) - { - if (field.empty()) + if (std::all_of(field.begin(), field.end(), isNumericASCII)) return false; ReadBufferFromString buf(field); - Float64 tmp_float; + DayNum tmp; + return tryReadDateText(tmp, buf, DateLUT::instance(), /*allowed_delimiters=*/"-/:") && buf.eof(); + } + + DataTypePtr tryInferDateTimeOrDateTime64(std::string_view field, const FormatSettings & settings) + { + /// Don't try to infer DateTime if string is too long. + /// It's difficult to say what is the real maximum length of + /// DateTime we can parse using BestEffort approach. + /// 50 symbols is more or less valid limit for date times that makes sense. + if (field.empty() || field.size() > 50) + return nullptr; + + /// Check that we have at least one digit, don't infer datetime form strings like "Apr"/"May"/etc. + if (!std::any_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + /// Check if it's just a number, and if so, don't try to infer DateTime from it, /// because we can interpret this number as a timestamp and it will lead to - /// inferring DateTime instead of simple Int64/Float64 in some cases. + /// inferring DateTime instead of simple Int64 in some cases. + if (std::all_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + + ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's a float value, and if so, don't try to infer DateTime from it, + /// because it will lead to inferring DateTime instead of simple Float64 in some cases. if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; + return nullptr; + + buf.seek(0, SEEK_SET); /// Return position to the beginning + if (!settings.try_infer_datetimes_only_datetime64) + { + time_t tmp; + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(tmp, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffortStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUSStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + } + } buf.seek(0, SEEK_SET); /// Return position to the beginning DateTime64 tmp; switch (settings.date_time_input_format) { case FormatSettings::DateTimeInputFormat::Basic: - if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) - return true; + if (tryReadDateTime64Text(tmp, 9, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffort: - if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffortUS: - if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortUSStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; } - return false; + return nullptr; } template @@ -821,7 +904,6 @@ namespace if (checkIfTypesAreEqual(nested_types_copy)) return std::make_shared(nested_types_copy.back()); - return std::make_shared(nested_types); } else @@ -1176,8 +1258,8 @@ namespace { if constexpr (is_json) { - if (settings.json.allow_object_type) - return std::make_shared("json", true); + if (settings.json.allow_deprecated_object_type) + return std::make_shared("json", true); } /// Empty Map is Map(Nothing, Nothing) @@ -1186,8 +1268,8 @@ namespace if constexpr (is_json) { - if (settings.json.allow_object_type) - return std::make_shared("json", true); + if (settings.json.allow_deprecated_object_type) + return std::make_shared("json", true); if (settings.json.read_objects_as_strings) return std::make_shared(); @@ -1242,7 +1324,7 @@ namespace { if constexpr (is_json) { - if (!settings.json.allow_object_type && settings.json.try_infer_objects_as_tuples) + if (!settings.json.allow_deprecated_object_type && settings.json.try_infer_objects_as_tuples) return tryInferJSONPaths(buf, settings, json_info, depth); } @@ -1262,7 +1344,7 @@ namespace if (checkCharCaseInsensitive('n', buf)) { if (checkStringCaseInsensitive("ull", buf)) - return makeNullable(std::make_shared()); + return std::make_shared(std::make_shared()); else if (checkStringCaseInsensitive("an", buf)) return std::make_shared(); } @@ -1416,6 +1498,15 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F return; } + + if (const auto * variant_type = typeid_cast(data_type.get())) + { + auto nested_types = variant_type->getVariants(); + for (auto & nested_type : nested_types) + transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types); + data_type = std::make_shared(nested_types); + return; + } } void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info) @@ -1439,8 +1530,11 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma if (settings.try_infer_dates && tryInferDate(field)) return std::make_shared(); - if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) - return std::make_shared(9); + if (settings.try_infer_datetimes) + { + if (auto type = tryInferDateTimeOrDateTime64(field, settings)) + return type; + } return nullptr; } @@ -1492,6 +1586,20 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) return nested_type ? std::make_shared(nested_type) : nullptr; } + if (which.isVariant()) + { + const auto * variant_type = assert_cast(type.get()); + DataTypes nested_types; + for (const auto & nested_type: variant_type->getVariants()) + { + if (!nested_type->lowCardinality() && nested_type->haveSubtypes()) + nested_types.push_back(makeNullableRecursively(nested_type)); + else + nested_types.push_back(nested_type); + } + return std::make_shared(nested_types); + } + if (which.isTuple()) { const auto * tuple_type = assert_cast(type.get()); @@ -1525,15 +1633,15 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) return nested_type ? std::make_shared(nested_type) : nullptr; } - if (which.isObject()) + if (which.isObjectDeprecated()) { - const auto * object_type = assert_cast(type.get()); + const auto * object_type = assert_cast(type.get()); if (object_type->hasNullableSubcolumns()) return type; - return std::make_shared(object_type->getSchemaFormat(), true); + return std::make_shared(object_type->getSchemaFormat(), true); } - return makeNullable(type); + return makeNullableSafe(type); } NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header) diff --git a/src/Formats/fuzzers/CMakeLists.txt b/src/Formats/fuzzers/CMakeLists.txt index 99e57bfbca3..b8a7e78b6e2 100644 --- a/src/Formats/fuzzers/CMakeLists.txt +++ b/src/Formats/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS}) -target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions) +target_link_libraries(format_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions) diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp index 408e7218221..12cd40f9442 100644 --- a/src/Formats/fuzzers/format_fuzzer.cpp +++ b/src/Formats/fuzzers/format_fuzzer.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 49f63073aaf..6cb4d492fd8 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,7 @@ FunctionBasePtr createFunctionBaseCast( class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: - const char * getNameImpl() const + static const char * getNameImpl(CastType cast_type, bool internal) { if (cast_type == CastType::accurate) return "accurateCast"; @@ -49,7 +50,7 @@ public: String getName() const override { - return getNameImpl(); + return getNameImpl(cast_type, internal); } size_t getNumberOfArguments() const override { return 2; } @@ -79,10 +80,22 @@ public: } } + static FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic) + { + if (cast_type == CastType::accurateOrNull && !isVariant(to)) + to = makeNullable(to); + + ColumnsWithTypeAndName arguments; + arguments.emplace_back(std::move(from)); + arguments.emplace_back().type = std::make_unique(); + + return createFunctionBaseCast(nullptr, getNameImpl(cast_type, true), arguments, to, diagnostic, cast_type); + } + protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { - return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type); + return createFunctionBaseCast(context, getNameImpl(cast_type, internal), arguments, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -130,9 +143,9 @@ private: }; -FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) +FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic) { - return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); + return CastOverloadResolverImpl::createInternalCast(std::move(from), std::move(to), cast_type, std::move(diagnostic)); } REGISTER_FUNCTION(CastOverloadResolvers) diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h index 7d98f774812..66f9d6cfcaf 100644 --- a/src/Functions/CastOverloadResolver.h +++ b/src/Functions/CastOverloadResolver.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,6 +12,9 @@ namespace DB class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + enum class CastType : uint8_t { nonAccurate, @@ -24,6 +28,6 @@ struct CastDiagnostic std::string column_to; }; -FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic); +FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic); } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 6f0bdf406db..dfb4b76e5e2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1198,7 +1198,7 @@ struct ToYearImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year = point.get(); + auto year = point.safeGet(); if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; const DateLUTImpl & date_lut = DateLUT::instance("UTC"); @@ -2003,7 +2003,7 @@ struct ToYYYYMMImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year_month = point.get(); + auto year_month = point.safeGet(); auto year = year_month / 100; auto month = year_month % 100; diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 83ed874c47b..4721f858f5c 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const ContextPtr & context) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, bool function_json_value_return_type_allow_complex) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -191,7 +191,7 @@ public: { /// Instead of creating a new generator for each row, we can reuse the same one. generator_json_path.reinitialize(); - added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, context); + added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, function_json_value_return_type_allow_complex); } if (!added_to_column) { @@ -204,11 +204,18 @@ public: }; template typename Impl> -class FunctionSQLJSON : public IFunction, WithConstContext +class FunctionSQLJSON : public IFunction { public: static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { } + explicit FunctionSQLJSON(ContextPtr context_) + : max_parser_depth(context_->getSettingsRef().max_parser_depth), + max_parser_backtracks(context_->getSettingsRef().max_parser_backtracks), + allow_simdjson(context_->getSettingsRef().allow_simdjson), + function_json_value_return_type_allow_complex(context_->getSettingsRef().function_json_value_return_type_allow_complex), + function_json_value_return_type_allow_nullable(context_->getSettingsRef().function_json_value_return_type_allow_nullable) + { + } static constexpr auto name = Name::name; String getName() const override { return Name::name; } @@ -221,7 +228,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { return Impl>::getReturnType( - Name::name, arguments, getContext()); + Name::name, arguments, function_json_value_return_type_allow_nullable); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -231,19 +238,25 @@ public: /// 2. Create ASTPtr /// 3. Parser(Tokens, ASTPtr) -> complete AST /// 4. Execute functions: call getNextItem on generator and handle each item - unsigned parse_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - unsigned parse_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); + unsigned parse_depth = static_cast(max_parser_depth); + unsigned parse_backtracks = static_cast(max_parser_backtracks); #if USE_SIMDJSON - if (getContext()->getSettingsRef().allow_simdjson) + if (allow_simdjson) return FunctionSQLJSONHelpers::Executor< Name, Impl>, - SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); + SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex); #endif return FunctionSQLJSONHelpers:: Executor>, DummyJSONParser>::run( - arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); + arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex); } +private: + const size_t max_parser_depth; + const size_t max_parser_backtracks; + const bool allow_simdjson; + const bool function_json_value_return_type_allow_complex; + const bool function_json_value_return_type_allow_nullable; }; struct NameJSONExists @@ -267,11 +280,11 @@ class JSONExistsImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool) { Element current_element = root; VisitorStatus status; @@ -305,9 +318,9 @@ class JSONValueImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr & context) + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool function_json_value_return_type_allow_nullable) { - if (context->getSettingsRef().function_json_value_return_type_allow_nullable) + if (function_json_value_return_type_allow_nullable) { DataTypePtr string_type = std::make_shared(); return std::make_shared(string_type); @@ -320,7 +333,7 @@ public: static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr & context) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool function_json_value_return_type_allow_complex) { Element current_element = root; VisitorStatus status; @@ -329,7 +342,7 @@ public: { if (status == VisitorStatus::Ok) { - if (context->getSettingsRef().function_json_value_return_type_allow_complex) + if (function_json_value_return_type_allow_complex) { break; } @@ -383,11 +396,11 @@ class JSONQueryImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool) { ColumnString & col_str = assert_cast(dest); diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index 306b6395dc5..210bb69e16d 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -101,9 +101,9 @@ private: BucketsType num_buckets; if (buckets_field.getType() == Field::Types::Int64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else if (buckets_field.getType() == Field::Types::UInt64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..0f3ffbffdff 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -43,8 +45,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -117,7 +121,7 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) Field field; named_column.column->get(0, field); - return static_cast(field.get()); + return static_cast(field.safeGet()); } @@ -1573,6 +1577,35 @@ struct ConvertImpl arguments, result_type, input_rows_count, additions); } } + else if constexpr (std::is_same_v && std::is_same_v) + { + IntervalKind to = typeid_cast(result_type.get())->getKind(); + IntervalKind from = typeid_cast(arguments[0].type.get())->getKind(); + + if (from == to || arguments[0].column->empty()) + return arguments[0].column; + + Int64 conversion_factor = 1; + Int64 result_value; + + int from_position = static_cast(from.kind); + int to_position = static_cast(to.kind); /// Positions of each interval according to granularity map + + if (from_position < to_position) + { + for (int i = from_position; i < to_position; ++i) + conversion_factor *= interval_conversions[i]; + result_value = arguments[0].column->getInt(0) / conversion_factor; + } + else + { + for (int i = from_position; i > to_position; --i) + conversion_factor *= interval_conversions[i]; + result_value = arguments[0].column->getInt(0) * conversion_factor; + } + + return ColumnConst::create(ColumnInt64::create(1, result_value), input_rows_count); + } else { using FromFieldType = typename FromDataType::FieldType; @@ -2181,7 +2214,7 @@ private: const DataTypePtr from_type = removeNullable(arguments[0].type); ColumnPtr result_column; - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; if (context) date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; @@ -2277,7 +2310,7 @@ private: } } else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); return true; }; @@ -2334,6 +2367,10 @@ private: else done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } + + if constexpr (std::is_same_v) + if (WhichDataType(from_type).isInterval()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } if (!done) @@ -2604,8 +2641,8 @@ struct ToNumberMonotonicity if (left.isNull() || right.isNull()) return {}; - Float64 left_float = left.get(); - Float64 right_float = right.get(); + Float64 left_float = left.safeGet(); + Float64 right_float = right.safeGet(); if (left_float >= static_cast(std::numeric_limits::min()) && left_float <= static_cast(std::numeric_limits::max()) @@ -2633,11 +2670,11 @@ struct ToNumberMonotonicity const bool left_in_first_half = left.isNull() ? from_is_unsigned - : (left.get() >= 0); + : (left.safeGet() >= 0); const bool right_in_first_half = right.isNull() ? !from_is_unsigned - : (right.get() >= 0); + : (right.safeGet() >= 0); /// Size of type is the same. if (size_of_from == size_of_to) @@ -2675,7 +2712,7 @@ struct ToNumberMonotonicity return {}; /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) + if (divideByRangeOfType(left.safeGet()) != divideByRangeOfType(right.safeGet())) return {}; if (to_is_unsigned) @@ -2683,7 +2720,7 @@ struct ToNumberMonotonicity else { // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); + const bool is_monotonic = (T(left.safeGet()) >= 0) == (T(right.safeGet()) >= 0); return { .is_monotonic = is_monotonic }; } @@ -2707,13 +2744,13 @@ struct ToDateMonotonicity } else if ( ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || (( (left.getType() == Field::Types::Float64 || left.isNull()) && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF)))) || !isNativeNumber(type)) { return {}; @@ -2768,16 +2805,16 @@ struct ToStringMonotonicity if (left.getType() == Field::Types::UInt64 && right.getType() == Field::Types::UInt64) { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } if (left.getType() == Field::Types::Int64 && right.getType() == Field::Types::Int64) { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (left.safeGet() > 0 && right.safeGet() > 0 && floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } @@ -3879,7 +3916,7 @@ private: "Expected tuple with {} subcolumn, but got {} subcolumns", tuple_size, column_tuple.getColumns().size()); - auto res = ColumnObject::create(has_nullable_subcolumns); + auto res = ColumnObjectDeprecated::create(has_nullable_subcolumns); for (size_t i = 0; i < tuple_size; ++i) { ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; @@ -3956,7 +3993,7 @@ private: subcolumn->insertDefault(); } - auto column_object = ColumnObject::create(has_nullable_subcolumns); + auto column_object = ColumnObjectDeprecated::create(has_nullable_subcolumns); for (auto && [key, subcolumn] : subcolumns) { PathInData path(key.toView()); @@ -3967,7 +4004,7 @@ private: }; } - WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const + WrapperType createObjectDeprecatedWrapper(const DataTypePtr & from_type, const DataTypeObjectDeprecated * to_type) const { if (const auto * from_tuple = checkAndGetDataType(from_type.get())) { @@ -3986,12 +4023,12 @@ private: return res; }; } - else if (checkAndGetDataType(from_type.get())) + else if (checkAndGetDataType(from_type.get())) { return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr { - const auto & column_object = assert_cast(*arguments.front().column); - auto res = ColumnObject::create(is_nullable); + const auto & column_object = assert_cast(*arguments.front().column); + auto res = ColumnObjectDeprecated::create(is_nullable); for (size_t i = 0; i < column_object.size(); i++) res->insert(column_object[i]); @@ -4004,6 +4041,25 @@ private: "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); } + WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_object) const + { + if (checkAndGetDataType(from_type.get())) + { + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) + { + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count, context)->assumeMutable(); + res->finalize(); + return res; + }; + } + + /// TODO: support CAST between JSON types with different parameters + /// support CAST from Map to JSON + /// support CAST from Tuple to JSON + /// support CAST from Object('json') to JSON + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cast to {} can be performed only from String. Got: {}", magic_enum::enum_name(to_object->getSchemaFormat()), from_type->getName()); + } + WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const { /// We support only extension of variant type, so, only new types can be added. @@ -4287,13 +4343,98 @@ private: WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const { return [this] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { + /// When casting Dynamic to regular column we should cast all variants from current Dynamic column + /// and construct the result based on discriminators. const auto & column_dynamic = assert_cast(*arguments.front().column.get()); + const auto & variant_column = column_dynamic.getVariantColumn(); const auto & variant_info = column_dynamic.getVariantInfo(); - auto variant_wrapper = createVariantToColumnWrapper(assert_cast(*variant_info.variant_type), result_type); - ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")}; - return variant_wrapper(args, result_type, col_nullable, input_rows_count); + + /// First, cast usual variants to result type. + const auto & variant_types = assert_cast(*variant_info.variant_type).getVariants(); + std::vector casted_variant_columns; + casted_variant_columns.reserve(variant_types.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i); + ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}}; + auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type); + casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); + } + + /// Second, collect all variants stored in shared variant and cast them to result type. + std::vector variant_columns_from_shared_variant; + DataTypes variant_types_from_shared_variant; + /// We will need to know what variant to use when we see discriminator of a shared variant. + /// To do it, we remember what variant was extracted from each row and what was it's offset. + PaddedPODArray shared_variant_indexes; + PaddedPODArray shared_variant_offsets; + std::unordered_map shared_variant_to_index; + const auto & shared_variant = column_dynamic.getSharedVariant(); + const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator(); + const auto & local_discriminators = variant_column.getLocalDiscriminators(); + const auto & offsets = variant_column.getOffsets(); + if (!shared_variant.empty()) + { + shared_variant_indexes.reserve(input_rows_count); + shared_variant_offsets.reserve(input_rows_count); + FormatSettings format_settings; + const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr); + for (size_t i = 0; i != input_rows_count; ++i) + { + if (local_discriminators[i] == shared_variant_local_discr) + { + auto value = shared_variant.getDataAt(offsets[i]); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + auto type_name = type->getName(); + auto it = shared_variant_to_index.find(type_name); + /// Check if we didn't create column for this variant yet. + if (it == shared_variant_to_index.end()) + { + it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first; + variant_columns_from_shared_variant.push_back(type->createColumn()); + variant_types_from_shared_variant.push_back(type); + } + + shared_variant_indexes.push_back(it->second); + shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size()); + type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings); + } + else + { + shared_variant_indexes.emplace_back(); + shared_variant_offsets.emplace_back(); + } + } + } + + /// Cast all extracted variants into result type. + std::vector casted_shared_variant_columns; + casted_shared_variant_columns.reserve(variant_types_from_shared_variant.size()); + for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i) + { + ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}}; + auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type); + casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size())); + } + + /// Construct result column from all casted variants. + auto res = result_type->createColumn(); + res->reserve(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) + { + auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + res->insertDefault(); + else if (global_discr == shared_variant_discr) + res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]); + else + res->insertFrom(*casted_variant_columns[global_discr], offsets[i]); + } + + return res; }; } @@ -4320,200 +4461,51 @@ private: }; } - std::pair getReducedVariant( - const ColumnVariant & variant_column, - const DataTypePtr & variant_type, - const std::unordered_map & variant_name_to_discriminator, - size_t max_result_num_variants, - const ColumnDynamic::Statistics & statistics = {}) const + WrapperType createVariantToDynamicWrapper(const DataTypeVariant & from_variant_type, const DataTypeDynamic & dynamic_type) const { - const auto & variant_types = assert_cast(*variant_type).getVariants(); - /// First check if we don't exceed the limit in current Variant column. - if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String"))) - return {variant_column.getPtr(), variant_type}; - - /// We want to keep the most frequent variants and convert to string the rarest. - std::vector> variant_sizes; - variant_sizes.reserve(variant_types.size()); - std::optional old_string_discriminator; - /// List of variants that should be converted to a single String variant. - std::vector variants_to_convert_to_string; - for (size_t i = 0; i != variant_types.size(); ++i) + /// First create extended Variant with shared variant type and cast this Variant to it. + auto variants_for_dynamic = from_variant_type.getVariants(); + size_t number_of_variants = variants_for_dynamic.size(); + variants_for_dynamic.push_back(ColumnDynamic::getSharedVariantDataType()); + const auto & variant_type_for_dynamic = std::make_shared(variants_for_dynamic); + auto old_to_new_variant_wrapper = createVariantToVariantWrapper(from_variant_type, *variant_type_for_dynamic); + auto max_dynamic_types = dynamic_type.getMaxDynamicTypes(); + return [old_to_new_variant_wrapper, variant_type_for_dynamic, number_of_variants, max_dynamic_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr { - /// String variant won't be removed. - String variant_name = variant_types[i]->getName(); + auto variant_column_for_dynamic = old_to_new_variant_wrapper(arguments, result_type, col_nullable, input_rows_count); + /// If resulting Dynamic column can contain all variants from this Variant column, just create Dynamic column from it. + if (max_dynamic_types >= number_of_variants) + return ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, max_dynamic_types, max_dynamic_types); - if (variant_name == "String") - { - old_string_discriminator = i; - /// For simplicity, add this variant to the list that will be converted to string, - /// so we will process it with other variants when constructing the new String variant. - variants_to_convert_to_string.push_back(i); - } - else - { - size_t size = 0; - if (statistics.data.empty()) - size = variant_column.getVariantByGlobalDiscriminator(i).size(); - else - size = statistics.data.at(variant_name); - variant_sizes.emplace_back(size, i); - } - } - - /// Sort variants by sizes, so we will keep the most frequent. - std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater()); - - DataTypes remaining_variants; - remaining_variants.reserve(max_result_num_variants); - /// Add String variant in advance. - remaining_variants.push_back(std::make_shared()); - for (auto [_, discr] : variant_sizes) - { - if (remaining_variants.size() != max_result_num_variants) - remaining_variants.push_back(variant_types[discr]); - else - variants_to_convert_to_string.push_back(discr); - } - - auto reduced_variant = std::make_shared(remaining_variants); - const auto & new_variants = reduced_variant->getVariants(); - /// To construct reduced variant column we will need mapping from old to new discriminators. - std::vector old_to_new_discriminators_mapping; - old_to_new_discriminators_mapping.resize(variant_types.size()); - ColumnVariant::Discriminator string_variant_discriminator = 0; - for (size_t i = 0; i != new_variants.size(); ++i) - { - String variant_name = new_variants[i]->getName(); - if (variant_name == "String") - { - string_variant_discriminator = i; - for (auto discr : variants_to_convert_to_string) - old_to_new_discriminators_mapping[discr] = i; - } - else - { - auto old_discr = variant_name_to_discriminator.at(variant_name); - old_to_new_discriminators_mapping[old_discr] = i; - } - } - - /// Convert all reduced variants to String. - std::unordered_map variants_converted_to_string; - variants_converted_to_string.reserve(variants_to_convert_to_string.size()); - size_t string_variant_size = 0; - for (auto discr : variants_to_convert_to_string) - { - auto string_type = std::make_shared(); - auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type); - auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], ""); - ColumnsWithTypeAndName args = {column_to_convert}; - auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size()); - string_variant_size += variant_string_column->size(); - variants_converted_to_string[discr] = variant_string_column; - } - - /// Create new discriminators and offsets and fill new String variant according to old discriminators. - auto string_variant = ColumnString::create(); - string_variant->reserve(string_variant_size); - auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty(); - auto & new_discriminators_data = assert_cast(*new_discriminators_column).getData(); - new_discriminators_data.reserve(variant_column.size()); - auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty(); - auto & new_offsets_data = assert_cast(*new_offsets).getData(); - new_offsets_data.reserve(variant_column.size()); - const auto & old_local_discriminators = variant_column.getLocalDiscriminators(); - const auto & old_offsets = variant_column.getOffsets(); - for (size_t i = 0; i != old_local_discriminators.size(); ++i) - { - auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]); - - if (old_discr == ColumnVariant::NULL_DISCRIMINATOR) - { - new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - new_offsets_data.push_back(0); - continue; - } - - auto new_discr = old_to_new_discriminators_mapping[old_discr]; - new_discriminators_data.push_back(new_discr); - if (new_discr != string_variant_discriminator) - { - new_offsets_data.push_back(old_offsets[i]); - } - else - { - new_offsets_data.push_back(string_variant->size()); - string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]); - } - } - - /// Create new list of variant columns. - Columns new_variant_columns; - new_variant_columns.resize(new_variants.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - auto new_discr = old_to_new_discriminators_mapping[i]; - if (new_discr != string_variant_discriminator) - new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i); - } - new_variant_columns[string_variant_discriminator] = std::move(string_variant); - return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant}; - } - - WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const - { - const auto & from_variant_type = assert_cast(*from_type); - size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes(); - const auto & variants = from_variant_type.getVariants(); - std::unordered_map variant_name_to_discriminator; - variant_name_to_discriminator.reserve(variants.size()); - for (size_t i = 0; i != variants.size(); ++i) - variant_name_to_discriminator[variants[i]->getName()] = i; - - return [from_type, max_dynamic_types, variant_name_to_discriminator, this] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & variant_column = assert_cast(*arguments.front().column); - auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types); - return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types); + /// Otherwise some variants should go to the shared variant. Create temporary Dynamic column from this Variant and insert + /// all data to the resulting Dynamic column, this insertion will do all the logic with shared variant. + auto tmp_dynamic_column = ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, number_of_variants, number_of_variants); + auto result_dynamic_column = ColumnDynamic::create(max_dynamic_types); + result_dynamic_column->insertRangeFrom(*tmp_dynamic_column, 0, tmp_dynamic_column->size()); + return result_dynamic_column; }; } WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const { if (const auto * variant_type = typeid_cast(from_type.get())) - return createVariantToDynamicWrapper(from_type, dynamic_type); - - if (dynamic_type.getMaxDynamicTypes() == 1) - { - DataTypePtr string_type = std::make_shared(); - if (from_type->isNullable()) - string_type = makeNullable(string_type); - auto string_wrapper = prepareUnpackDictionaries(from_type, string_type); - auto variant_type = std::make_shared(DataTypes{removeNullable(string_type)}); - auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type); - return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr - { - auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count); - auto column = ColumnWithTypeAndName(string_column, string_type, ""); - ColumnsWithTypeAndName args = {column}; - auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size()); - return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types); - }; - } + return createVariantToDynamicWrapper(*variant_type, dynamic_type); if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type)))) return createStringToDynamicThroughParsingWrapper(); + /// First, cast column to Variant with 2 variants - the type of the column we cast and shared variant type. auto variant_type = std::make_shared(DataTypes{removeNullableOrLowCardinalityNullable(from_type)}); - auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type); - return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + auto column_to_variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type); + /// Second, cast this Variant to Dynamic. + auto variant_to_dynamic_wrapper = createVariantToDynamicWrapper(*variant_type, dynamic_type); + return [column_to_variant_wrapper, variant_to_dynamic_wrapper, variant_type] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr { - auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count); - return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types); + auto variant_res = column_to_variant_wrapper(arguments, variant_type, col_nullable, input_rows_count); + ColumnsWithTypeAndName args = {{variant_res, variant_type, ""}}; + return variant_to_dynamic_wrapper(args, result_type, nullptr, input_rows_count); }; } @@ -4530,21 +4522,26 @@ private: (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr { const auto & column_dynamic = assert_cast(*arguments[0].column); - return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types); + /// We should use the same limit as already used in column and change only global limit. + /// It's needed because shared variant should contain values only when limit is exceeded, + /// so if there are already some data, we cannot increase the limit. + return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types); }; } - return [to_max_types, this] + return [to_max_types] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr { const auto & column_dynamic = assert_cast(*arguments[0].column); - auto [reduced_variant_column, reduced_variant_type] = getReducedVariant( - column_dynamic.getVariantColumn(), - column_dynamic.getVariantInfo().variant_type, - column_dynamic.getVariantInfo().variant_name_to_discriminator, - to_max_types, - column_dynamic.getStatistics()); - return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types); + /// If real limit in the column is not greater than desired, just use the same variant column. + if (column_dynamic.getMaxDynamicTypes() <= to_max_types) + return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types); + + /// Otherwise some variants should go to the shared variant. In this case we can just insert all + /// the data into resulting column and it will do all the logic with shared variant. + auto result_dynamic_column = ColumnDynamic::create(to_max_types); + result_dynamic_column->insertRangeFrom(column_dynamic, 0, column_dynamic.size()); + return result_dynamic_column; }; } @@ -4673,7 +4670,7 @@ private: return [function_name] ( ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) { - using ColumnEnumType = EnumType::ColumnType; + using ColumnEnumType = typename EnumType::ColumnType; const auto & first_col = arguments.front().column.get(); const auto & first_type = arguments.front().type.get(); @@ -5137,6 +5134,8 @@ private: return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); case TypeIndex::Map: return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::ObjectDeprecated: + return createObjectDeprecatedWrapper(from_type, checkAndGetDataType(to_type.get())); case TypeIndex::Object: return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); case TypeIndex::AggregateFunction: diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 95c54ac9528..3da0b2cd9be 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -77,64 +77,70 @@ namespace impl ColumnPtr key0; ColumnPtr key1; bool is_const; - const ColumnArray::Offsets * offsets{}; + const ColumnArray::Offsets * offsets = nullptr; size_t size() const { assert(key0 && key1); assert(key0->size() == key1->size()); - assert(offsets == nullptr || offsets->size() == key0->size()); - if (offsets != nullptr) + if (offsets != nullptr && !offsets->empty()) return offsets->back(); return key0->size(); } + SipHashKey getKey(size_t i) const { if (is_const) i = 0; - if (offsets != nullptr) + assert(key0->size() == key1->size()); + if (offsets != nullptr && i > 0) { - const auto *const begin = offsets->begin(); + const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1); const auto * upper = std::upper_bound(begin, offsets->end(), i); - if (upper == offsets->end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); - i = upper - begin; + if (upper != offsets->end()) + i = upper - begin; } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); + assert(key0->size() > i); return {key0data[i], key1data[i]}; } }; static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) { - const ColumnTuple * tuple = nullptr; - const auto * column = key.column.get(); - bool is_const = false; - if (isColumnConst(*column)) + const auto * col_key = key.column.get(); + + bool is_const; + const ColumnTuple * col_key_tuple; + if (isColumnConst(*col_key)) { is_const = true; - tuple = checkAndGetColumnConstData(column); + col_key_tuple = checkAndGetColumnConstData(col_key); } else - tuple = checkAndGetColumn(column); - if (!tuple) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); - if (tuple->tupleSize() != 2) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + { + is_const = false; + col_key_tuple = checkAndGetColumn(col_key); + } - SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; - assert(ret.key0); - if (!checkColumn(*ret.key0)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - assert(ret.key1); - if (!checkColumn(*ret.key1)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); + if (!col_key_tuple || col_key_tuple->tupleSize() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The key must be of type Tuple(UInt64, UInt64)"); - if (ret.size() == 1) - ret.is_const = true; + SipHashKeyColumns result{.key0 = col_key_tuple->getColumnPtr(0), .key1 = col_key_tuple->getColumnPtr(1), .is_const = is_const}; - return ret; + assert(result.key0); + assert(result.key1); + + if (!checkColumn(*result.key0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 1st element of the key tuple is not of type UInt64"); + if (!checkColumn(*result.key1)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 2nd element of the key tuple is not of type UInt64"); + + if (result.size() == 1) + result.is_const = true; + + return result; } } diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index c35df8ba72d..e6892642d56 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -243,7 +243,7 @@ private: } case MoveType::Index: { - Int64 index = (*arguments[j + 1].column)[row].get(); + Int64 index = (*arguments[j + 1].column)[row].safeGet(); if (!moveToElementByIndex(res_element, static_cast(index), key)) return false; break; @@ -739,7 +739,7 @@ public: { NumberType value; - if (!tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error)) + if (!tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, /*allow_type_conversion=*/true, error)) return false; auto & col_vec = assert_cast &>(dest); col_vec.insertValue(value); diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 65d7473b945..ff0cff09c9e 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -701,11 +701,11 @@ ColumnPtr FunctionAnyArityLogical::getConstantResultForNonConstArgum bool constant_value_bool = false; if (field_type == Field::Types::Float64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::Int64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::UInt64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); has_true_constant = has_true_constant || constant_value_bool; has_false_constant = has_false_constant || !constant_value_bool; diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 6b65a5feaec..ed7fe1a5de1 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -500,7 +500,7 @@ inline Scale getScaleArg(const ColumnConst* scale_col) { const auto & scale_field = scale_col->getField(); - Int64 scale64 = scale_field.get(); + Int64 scale64 = scale_field.safeGet(); validateScale(scale64); return scale64; @@ -632,7 +632,7 @@ public: Scale raw_scale = scale64; DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } } } @@ -854,7 +854,7 @@ private: using ValueType = typename Container::value_type; std::vector boundary_values(boundaries.size()); for (size_t i = 0; i < boundaries.size(); ++i) - boundary_values[i] = static_cast(boundaries[i].get()); + boundary_values[i] = static_cast(boundaries[i].safeGet()); ::sort(boundary_values.begin(), boundary_values.end()); boundary_values.erase(std::unique(boundary_values.begin(), boundary_values.end()), boundary_values.end()); diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 77d740803be..88b85c48326 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -267,7 +267,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else @@ -622,7 +627,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index 51542c9cab1..ba0baa35819 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -50,15 +50,15 @@ public: if (checkAndGetDataType(&type)) { - return Transform::FactorTransform::execute(UInt16(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } else { - return Transform::FactorTransform::execute(UInt32(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } diff --git a/src/Functions/IFunctionDateOrDateTime.h b/src/Functions/IFunctionDateOrDateTime.h index 762b79bfafc..899aa2c305d 100644 --- a/src/Functions/IFunctionDateOrDateTime.h +++ b/src/Functions/IFunctionDateOrDateTime.h @@ -72,30 +72,30 @@ public: if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt16(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(Int32(left.get()), *date_lut) - == Transform::FactorTransform::execute(Int32(right.get()), *date_lut) + return Transform::FactorTransform::execute(Int32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(Int32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt32(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else { assert(checkAndGetDataType(type_ptr)); - const auto & left_date_time = left.get(); + const auto & left_date_time = left.safeGet(); TransformDateTime64 transformer_left(left_date_time.getScale()); - const auto & right_date_time = right.get(); + const auto & right_date_time = right.safeGet(); TransformDateTime64 transformer_right(right_date_time.getScale()); return transformer_left.execute(left_date_time.getValue(), *date_lut) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp index fb74018b330..84ac0ff08f3 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -46,7 +46,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.first = static_cast(number_ptr->as()->value.get()); + range_indices.first = static_cast(number_ptr->as()->value.safeGet()); if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) { @@ -63,7 +63,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.second = static_cast(number_ptr->as()->value.get()); + range_indices.second = static_cast(number_ptr->as()->value.safeGet()); } else { diff --git a/src/Functions/JSONPaths.cpp b/src/Functions/JSONPaths.cpp new file mode 100644 index 00000000000..dfb0386e370 --- /dev/null +++ b/src/Functions/JSONPaths.cpp @@ -0,0 +1,518 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +enum class PathsMode +{ + ALL_PATHS, + DYNAMIC_PATHS, + SHARED_DATA_PATHS, +}; + +struct JSONAllPathsImpl +{ + static constexpr auto name = "JSONAllPaths"; + static constexpr auto paths_mode = PathsMode::ALL_PATHS; + static constexpr auto with_types = false; +}; + +struct JSONAllPathsWithTypesImpl +{ + static constexpr auto name = "JSONAllPathsWithTypes"; + static constexpr auto paths_mode = PathsMode::ALL_PATHS; + static constexpr auto with_types = true; +}; + +struct JSONDynamicPathsImpl +{ + static constexpr auto name = "JSONDynamicPaths"; + static constexpr auto paths_mode = PathsMode::DYNAMIC_PATHS; + static constexpr auto with_types = false; +}; + +struct JSONDynamicPathsWithTypesImpl +{ + static constexpr auto name = "JSONDynamicPathsWithTypes"; + static constexpr auto paths_mode = PathsMode::DYNAMIC_PATHS; + static constexpr auto with_types = true; +}; + +struct JSONSharedDataPathsImpl +{ + static constexpr auto name = "JSONSharedDataPaths"; + static constexpr auto paths_mode = PathsMode::SHARED_DATA_PATHS; + static constexpr auto with_types = false; +}; + +struct JSONSharedDataPathsWithTypesImpl +{ + static constexpr auto name = "JSONSharedDataPathsWithTypes"; + static constexpr auto paths_mode = PathsMode::SHARED_DATA_PATHS; + static constexpr auto with_types = true; +}; + +/// Implements functions that extracts paths and types from JSON object column. +/// Used for introspection of the content of the JSON object column. +template +class FunctionJSONPaths : public IFunction +{ +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & data_types) const override + { + if (data_types.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires single argument with type JSON", getName()); + + if (data_types[0]->getTypeId() != TypeIndex::Object) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires argument with type JSON, got: {}", getName(),data_types[0]->getName()); + + if constexpr (Impl::with_types) + return std::make_shared(std::make_shared(), std::make_shared()); + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + const ColumnWithTypeAndName & elem = arguments[0]; + const auto * column_object = typeid_cast(elem.column.get()); + if (!column_object) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected column type in function {}. Expected Object column, got {}", getName(), elem.column->getName()); + + const auto & type_object = assert_cast(*elem.type); + if constexpr (Impl::with_types) + return executeWithTypes(*column_object, type_object); + return executeWithoutTypes(*column_object); + } + +private: + ColumnPtr executeWithoutTypes(const ColumnObject & column_object) const + { + if constexpr (Impl::paths_mode == PathsMode::SHARED_DATA_PATHS) + { + /// No need to do anything, we already have a column with all sorted paths in shared data. + const auto & shared_data_array = column_object.getSharedDataNestedColumn(); + const auto & shared_data_paths = assert_cast(shared_data_array.getData()).getColumnPtr(0); + return ColumnArray::create(shared_data_paths, shared_data_array.getOffsetsPtr()); + } + + auto res = ColumnArray::create(ColumnString::create()); + auto & offsets = res->getOffsets(); + ColumnString & data = assert_cast(res->getData()); + + if constexpr (Impl::paths_mode == PathsMode::DYNAMIC_PATHS) + { + /// Collect all dynamic paths. + const auto & dynamic_path_columns = column_object.getDynamicPaths(); + std::vector dynamic_paths; + dynamic_paths.reserve(dynamic_path_columns.size()); + for (const auto & [path, _] : dynamic_path_columns) + dynamic_paths.push_back(path); + /// We want the resulting arrays of paths to be sorted for consistency. + std::sort(dynamic_paths.begin(), dynamic_paths.end()); + + size_t size = column_object.size(); + for (size_t i = 0; i != size; ++i) + { + for (const auto path : dynamic_paths) + { + /// Don't include path if it contains NULL, because we consider + /// it to be equivalent to the absence of this path in this row. + if (!dynamic_path_columns.find(path)->second->isNullAt(i)) + data.insertData(path.data(), path.size()); + } + offsets.push_back(data.size()); + } + return res; + } + + /// Collect all paths: typed, dynamic and paths from shared data. + std::vector sorted_dynamic_and_typed_paths; + const auto & typed_path_columns = column_object.getTypedPaths(); + const auto & dynamic_path_columns = column_object.getDynamicPaths(); + sorted_dynamic_and_typed_paths.reserve(typed_path_columns.size() + dynamic_path_columns.size()); + for (const auto & [path, _] : typed_path_columns) + sorted_dynamic_and_typed_paths.push_back(path); + for (const auto & [path, _] : dynamic_path_columns) + sorted_dynamic_and_typed_paths.push_back(path); + + /// We want the resulting arrays of paths to be sorted for consistency. + std::sort(sorted_dynamic_and_typed_paths.begin(), sorted_dynamic_and_typed_paths.end()); + + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + const auto [shared_data_paths, _] = column_object.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_offsets.size(); ++i) + { + size_t start = shared_data_offsets[static_cast(i) - 1]; + size_t end = shared_data_offsets[static_cast(i)]; + /// Merge sorted list of paths from shared data and sorted_dynamic_and_typed_paths + size_t sorted_paths_index = 0; + for (size_t j = start; j != end; ++j) + { + auto shared_data_path = shared_data_paths->getDataAt(j).toView(); + while (sorted_paths_index != sorted_dynamic_and_typed_paths.size() && sorted_dynamic_and_typed_paths[sorted_paths_index] < shared_data_path) + { + const auto path = sorted_dynamic_and_typed_paths[sorted_paths_index]; + /// If it's dynamic path include it only if it's not NULL. + if (auto it = dynamic_path_columns.find(path); it == dynamic_path_columns.end() || !it->second->isNullAt(i)) + data.insertData(path.data(), path.size()); + ++sorted_paths_index; + } + + data.insertData(shared_data_path.data(), shared_data_path.size()); + } + + for (; sorted_paths_index != sorted_dynamic_and_typed_paths.size(); ++sorted_paths_index) + { + const auto path = sorted_dynamic_and_typed_paths[sorted_paths_index]; + if (auto it = dynamic_path_columns.find(path); it == dynamic_path_columns.end() || !it->second->isNullAt(i)) + data.insertData(path.data(), path.size()); + } + + offsets.push_back(data.size()); + } + + return res; + } + + ColumnPtr executeWithTypes(const ColumnObject & column_object, const DataTypeObject & type_object) const + { + auto offsets_column = ColumnArray::ColumnOffsets::create(); + auto & offsets = offsets_column->getData(); + auto paths_column = ColumnString::create(); + auto types_column = ColumnString::create(); + + if constexpr (Impl::paths_mode == PathsMode::DYNAMIC_PATHS) + { + const auto & dynamic_path_columns = column_object.getDynamicPaths(); + std::vector sorted_dynamic_paths; + sorted_dynamic_paths.reserve(dynamic_path_columns.size()); + for (const auto & [path, _] : dynamic_path_columns) + sorted_dynamic_paths.push_back(path); + /// We want the resulting arrays of paths and values to be sorted for consistency. + std::sort(sorted_dynamic_paths.begin(), sorted_dynamic_paths.end()); + + /// Iterate over all rows and extract types from dynamic columns. + for (size_t i = 0; i != column_object.size(); ++i) + { + for (const auto path : sorted_dynamic_paths) + { + const auto & column = dynamic_path_columns.find(path)->second; + if (!column->isNullAt(i)) + { + auto type = getDynamicValueType(column, i); + paths_column->insertData(path.data(), path.size()); + types_column->insertData(type.data(), type.size()); + } + } + + offsets.push_back(paths_column->size()); + } + + return ColumnMap::create(ColumnPtr(std::move(paths_column)), ColumnPtr(std::move(types_column)), ColumnPtr(std::move(offsets_column))); + } + + if constexpr (Impl::paths_mode == PathsMode::SHARED_DATA_PATHS) + { + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + /// Iterate over all rows and extract types from dynamic values in shared data. + for (size_t i = 0; i != shared_data_offsets.size(); ++i) + { + size_t start = shared_data_offsets[static_cast(i) - 1]; + size_t end = shared_data_offsets[static_cast(i)]; + for (size_t j = start; j != end; ++j) + { + if (auto type_name = getDynamicValueTypeFromSharedData(shared_data_values->getDataAt(j))) + { + paths_column->insertFrom(*shared_data_paths, j); + types_column->insertData(type_name->data(), type_name->size()); + } + } + + offsets.push_back(paths_column->size()); + } + + return ColumnMap::create(ColumnPtr(std::move(paths_column)), ColumnPtr(std::move(types_column)), ColumnPtr(std::move(offsets_column))); + } + + /// Iterate over all rows and extract types from dynamic columns from dynamic paths and from values in shared data. + std::vector> sorted_typed_and_dynamic_paths_with_types; + const auto & typed_path_types = type_object.getTypedPaths(); + const auto & dynamic_path_columns = column_object.getDynamicPaths(); + sorted_typed_and_dynamic_paths_with_types.reserve(typed_path_types.size() + dynamic_path_columns.size()); + for (const auto & [path, type] : typed_path_types) + sorted_typed_and_dynamic_paths_with_types.emplace_back(path, type->getName()); + for (const auto & [path, _] : dynamic_path_columns) + sorted_typed_and_dynamic_paths_with_types.emplace_back(path, ""); + + /// We want the resulting arrays of paths and values to be sorted for consistency. + std::sort(sorted_typed_and_dynamic_paths_with_types.begin(), sorted_typed_and_dynamic_paths_with_types.end()); + + const auto & shared_data_offsets = column_object.getSharedDataOffsets(); + const auto [shared_data_paths, shared_data_values] = column_object.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_offsets.size(); ++i) + { + size_t start = shared_data_offsets[static_cast(i) - 1]; + size_t end = shared_data_offsets[static_cast(i)]; + /// Merge sorted list of paths and values from shared data and sorted_typed_and_dynamic_paths_with_types + size_t sorted_paths_index = 0; + for (size_t j = start; j != end; ++j) + { + auto shared_data_path = shared_data_paths->getDataAt(j).toView(); + auto type_name = getDynamicValueTypeFromSharedData(shared_data_values->getDataAt(j)); + /// Skip NULL values. + if (!type_name) + continue; + + while (sorted_paths_index != sorted_typed_and_dynamic_paths_with_types.size() && sorted_typed_and_dynamic_paths_with_types[sorted_paths_index].first < shared_data_path) + { + auto & [path, type] = sorted_typed_and_dynamic_paths_with_types[sorted_paths_index]; + /// Update type for path from dynamic paths. + if (auto it = dynamic_path_columns.find(path); it != dynamic_path_columns.end()) + { + /// Skip NULL values. + if (it->second->isNullAt(i)) + { + ++sorted_paths_index; + continue; + } + type = getDynamicValueType(it->second, i); + } + paths_column->insertData(path.data(), path.size()); + types_column->insertData(type.data(), type.size()); + ++sorted_paths_index; + } + + paths_column->insertData(shared_data_path.data(), shared_data_path.size()); + types_column->insertData(type_name->data(), type_name->size()); + } + + for (; sorted_paths_index != sorted_typed_and_dynamic_paths_with_types.size(); ++sorted_paths_index) + { + auto & [path, type] = sorted_typed_and_dynamic_paths_with_types[sorted_paths_index]; + if (auto it = dynamic_path_columns.find(path); it != dynamic_path_columns.end()) + { + /// Skip NULL values. + if (it->second->isNullAt(i)) + continue; + type = getDynamicValueType(it->second, i); + } + paths_column->insertData(path.data(), path.size()); + types_column->insertData(type.data(), type.size()); + } + + offsets.push_back(paths_column->size()); + } + + return ColumnMap::create(ColumnPtr(std::move(paths_column)), ColumnPtr(std::move(types_column)), ColumnPtr(std::move(offsets_column))); + } + + String getDynamicValueType(const ColumnPtr & column, size_t i) const + { + const ColumnDynamic * dynamic_column = checkAndGetColumn(column.get()); + const auto & variant_info = dynamic_column->getVariantInfo(); + const auto & variant_column = dynamic_column->getVariantColumn(); + auto global_discr = variant_column.globalDiscriminatorAt(i); + /// We don't output path with NULL values. It should be checked before calling getDynamicValueType. + chassert(global_discr != ColumnVariant::NULL_DISCRIMINATOR); + if (global_discr == dynamic_column->getSharedVariantDiscriminator()) + { + auto value = dynamic_column->getSharedVariant().getDataAt(variant_column.offsetAt(i)); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + return type->getName(); + } + + return variant_info.variant_names[global_discr]; + } + + std::optional getDynamicValueTypeFromSharedData(StringRef value) const + { + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + if (isNothing(type)) + return std::nullopt; + return type->getName(); + } +}; + +} + +REGISTER_FUNCTION(JSONPaths) +{ + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of all paths stored in each row in JSON column. +)", + .syntax = {"JSONAllPaths(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONAllPaths(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONAllPaths(json)─┐ +│ {"a":"42"} │ ['a'] │ +│ {"b":"Hello"} │ ['b'] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a','c'] │ +└──────────────────────────────────────┴────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); + + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of all paths and their data types stored in each row in JSON column. +)", + .syntax = {"JSONAllPathsWithTypes(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONAllPathsWithTypes(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONAllPathsWithTypes(json)───────────────┐ +│ {"a":"42"} │ {'a':'Int64'} │ +│ {"b":"Hello"} │ {'b':'String'} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))','c':'Date'} │ +└──────────────────────────────────────┴───────────────────────────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); + + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of dynamic paths that are stored as separate subcolumns in JSON column. +)", + .syntax = {"JSONDynamicPaths(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONDynamicPaths(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONDynamicPaths(json)─┐ +│ {"a":"42"} │ ['a'] │ +│ {"b":"Hello"} │ [] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a'] │ +└──────────────────────────────────────┴────────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); + + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of dynamic paths that are stored as separate subcolumns and their types in each row in JSON column. +)", + .syntax = {"JSONDynamicPathsWithTypes(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONDynamicPathsWithTypes(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONDynamicPathsWithTypes(json)─┐ +│ {"a":"42"} │ {'a':'Int64'} │ +│ {"b":"Hello"} │ {} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))'} │ +└──────────────────────────────────────┴─────────────────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); + + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of paths that are stored in shared data structure in JSON column. +)", + .syntax = {"JSONDynamicPaths(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONSharedDataPaths(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONSharedDataPaths(json)─┐ +│ {"a":"42"} │ [] │ +│ {"b":"Hello"} │ ['b'] │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['c'] │ +└──────────────────────────────────────┴───────────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); + + factory.registerFunction>(FunctionDocumentation{ + .description = R"( +Returns the list of paths that are stored in shared data structure and their types in each row in JSON column. +)", + .syntax = {"JSONDynamicPathsWithTypes(json)"}, + .arguments = {{"json", "JSON column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory; +INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}} +SELECT json, JSONDynamicPathsWithTypes(json) FROM test; +)", + R"( +┌─json─────────────────────────────────┬─JSONDynamicPathsWithTypes(json)─┐ +│ {"a":"42"} │ {'a':'Int64'} │ +│ {"b":"Hello"} │ {} │ +│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))'} │ +└──────────────────────────────────────┴─────────────────────────────────┘ +)"}}}, + .categories{"JSON"}, + }); +} + +} diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index d463ef96e16..a52703d10c8 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -1,7 +1,6 @@ #pragma once #include - namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index eedabca5b22..36ee1723269 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -1,15 +1,13 @@ #pragma once + +#include "config.h" + +#if USE_ICU + #include #include -#include -#include +#include #include -#include - -#ifdef __SSE2__ -#include -#endif - namespace DB { @@ -19,71 +17,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -/// xor or do nothing -template -UInt8 xor_or_identity(const UInt8 c, const int mask) -{ - return c ^ mask; -} - -template <> -inline UInt8 xor_or_identity(const UInt8 c, const int) -{ - return c; -} - -/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array -template -inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) -{ - if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// ѐёђѓєѕіїјљњћќѝўџ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// А-П - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) - { - /// а-п - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) - { - /// Р-Я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// р-я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } -} - - -/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case. - * Note: It is assumed that after the character is converted to another case, - * the length of its multibyte sequence in UTF-8 does not change. - * Otherwise, the behavior is undefined. - */ -template +template struct LowerUpperUTF8Impl { static void vector( @@ -103,180 +37,48 @@ struct LowerUpperUTF8Impl return; } - res_data.resize_exact(data.size()); - res_offsets.assign(offsets); - array(data.data(), data.data() + data.size(), offsets, res_data.data()); + res_data.resize(data.size()); + res_offsets.resize_exact(offsets.size()); + + String output; + size_t curr_offset = 0; + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); + size_t size = offsets[i] - offsets[i - 1]; + + icu::UnicodeString input(data_start, static_cast(size), "UTF-8"); + if constexpr (upper) + input.toUpper(); + else + input.toLower(); + + output.clear(); + input.toUTF8String(output); + + /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that + /// '\0' is valid. If the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this + /// case, the behavior is also reasonable. + size_t valid_size = output.size(); + if (!output.empty() && output.back() == '\0') + --valid_size; + + res_data.resize(curr_offset + valid_size + 1); + + memcpy(&res_data[curr_offset], output.data(), valid_size); + res_data[curr_offset + valid_size] = 0; + + curr_offset += valid_size + 1; + res_offsets[i] = curr_offset; + } } static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } - - /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`. - * `src` and `dst` are incremented by corresponding sequence lengths. */ - static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial) - { - if (src[0] <= ascii_upper_bound) - { - if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) - *dst++ = *src++ ^ flip_case_mask; - else - *dst++ = *src++; - } - else if (src + 1 < src_end - && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu)))) - { - cyrillic_to_case(src, dst); - } - else if (src + 1 < src_end && src[0] == 0xC2u) - { - /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF - *dst++ = *src++; - *dst++ = *src++; - } - else if (src + 2 < src_end && src[0] == 0xE2u) - { - /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } - else - { - size_t src_sequence_length = UTF8::seqLength(*src); - /// In case partial buffer was passed (due to SSE optimization) - /// we cannot convert it with current src_end, but we may have more - /// bytes to convert and eventually got correct symbol. - if (partial && src_sequence_length > static_cast(src_end - src)) - return false; - - auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); - if (src_code_point) - { - int dst_code_point = to_case(*src_code_point); - if (dst_code_point > 0) - { - size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); - assert(dst_sequence_length <= 4); - - /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. - /// As an example, this happens for ß and ẞ. - if (dst_sequence_length == src_sequence_length) - { - src += dst_sequence_length; - dst += dst_sequence_length; - return true; - } - } - } - - *dst = *src; - ++dst; - ++src; - } - - return true; - } - -private: - static constexpr auto ascii_upper_bound = '\x7f'; - static constexpr auto flip_case_mask = 'A' ^ 'a'; - - static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) - { - const auto * offset_it = offsets.begin(); - const UInt8 * begin = src; - -#ifdef __SSE2__ - static constexpr auto bytes_sse = sizeof(__m128i); - - /// If we are before this position, we can still read at least bytes_sse. - const auto * src_end_sse = src_end - bytes_sse + 1; - - /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) - const auto v_zero = _mm_setzero_si128(); - const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); - const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); - const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); - - while (src < src_end_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(src)); - - /// check for ASCII - const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero); - const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii); - - /// ASCII - if (mask_is_not_ascii == 0) - { - const auto is_not_case - = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); - const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); - - /// everything in correct case ASCII - if (mask_is_not_case == 0) - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); - else - { - /// ASCII in mixed case - /// keep `flip_case_mask` only where necessary, zero out elsewhere - const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); - - /// flip case by applying calculated mask - const auto cased_chars = _mm_xor_si128(chars, xor_mask); - - /// store result back to destination - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); - } - - src += bytes_sse; - dst += bytes_sse; - } - else - { - /// UTF-8 - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_from_begin >= *offset_it) - ++offset_it; - - /// Do not allow one row influence another (since row may have invalid sequence, and break the next) - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - const UInt8 * expected_end = std::min(src + bytes_sse, row_end); - - while (src < expected_end) - { - if (!toCase(src, expected_end, dst, /* partial= */ true)) - { - /// Fallback to handling byte by byte. - src_end_sse = src; - break; - } - } - } - } - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_it != offsets.end() && offset_from_begin >= *offset_it) - ++offset_it; -#endif - - /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) - while (src < src_end) - { - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - - while (src < row_end) - toCase(src, row_end, dst, /* partial= */ false); - ++offset_it; - } - } }; } + +#endif diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3aeac808880..e7c3aebf794 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -77,7 +77,7 @@ struct MultiMatchAllIndicesImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index fda752cbacc..54413cbc1cd 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -91,7 +91,7 @@ struct MultiMatchAnyImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiSearchAllPositionsImpl.h b/src/Functions/MultiSearchAllPositionsImpl.h index cfe60e51bcd..6c2cd215638 100644 --- a/src/Functions/MultiSearchAllPositionsImpl.h +++ b/src/Functions/MultiSearchAllPositionsImpl.h @@ -33,7 +33,7 @@ struct MultiSearchAllPositionsImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index 5b34dbfe601..f1dc9ab9e11 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -45,7 +45,7 @@ struct MultiSearchFirstIndexImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index 06bf7aa94d8..4380eeb1b29 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -45,7 +45,7 @@ struct MultiSearchFirstPositionImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 909425f5a93..5c652ddcb74 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -45,7 +45,7 @@ struct MultiSearchImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 3ab9cad1ea7..4439e79e962 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -156,7 +156,7 @@ public: for (size_t j = 0; j < num_needles; ++j) { auto field = col_needle_const_array->getData()[j]; - cutURL(res_data, field.get(), res_offset, cur_res_offset); + cutURL(res_data, field.safeGet(), res_offset, cur_res_offset); } } else diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 227b29d5d9f..d0b2b49cc1c 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -904,10 +904,10 @@ ColumnPtr FunctionArrayElement::executeNumberConst( return nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res_vec->getData(), builder); + col_nested->getData(), col_array->getOffsets(), index.safeGet() - 1, col_res_vec->getData(), builder); } else if (index.getType() == Field::Types::Int64) { @@ -972,14 +972,14 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument auto col_res = ColumnString::create(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (builder) ArrayElementStringImpl::vectorConst( col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -988,7 +988,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -1000,7 +1000,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1009,7 +1009,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1046,7 +1046,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( auto res_offsets = ColumnArray::ColumnOffsets::create(); auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayStringImpl::vectorConst( @@ -1055,7 +1055,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1068,7 +1068,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1084,7 +1084,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1097,7 +1097,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1153,7 +1153,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( auto & res_offsets = res_array->getOffsets(); NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; - if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayNumImpl::template vectorConst( @@ -1161,7 +1161,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1172,7 +1172,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1392,12 +1392,12 @@ ColumnPtr FunctionArrayElement::executeGenericConst( auto col_res = col_nested.cloneEmpty(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), index.get() - 1, *col_res, builder); + col_nested, col_array->getOffsets(), index.safeGet() - 1, *col_res, builder); else if (index.getType() == Field::Types::Int64) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -(static_cast(index.get() + 1)), *col_res, builder); + col_nested, col_array->getOffsets(), -(static_cast(index.safeGet() + 1)), *col_res, builder); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); @@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu const auto & tuple_columns = col_nested->getColumns(); size_t tuple_size = tuple_columns.size(); + if (tuple_size == 0) + return ColumnTuple::create(input_rows_count); + const DataTypes & tuple_types = typeid_cast( *typeid_cast(*arguments[0].type).getNestedType()).getElements(); @@ -1789,7 +1792,7 @@ bool FunctionArrayElement::matchKeyToIndexStringConst( using DataColumn = std::decay_t; if (index.getType() != Field::Types::String) return false; - MatcherStringConst matcher{data_column, index.get()}; + MatcherStringConst matcher{data_column, index.safeGet()}; executeMatchKeyToIndex(offsets, matched_idxs, matcher); return true; }); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 86797cb5db0..614b01c2ac8 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -237,7 +237,7 @@ private: } arg.val_column->get(offset + j, temp_val); - ValType value = temp_val.get(); + ValType value = temp_val.safeGet(); if constexpr (op_type == OpTypes::ADD) { diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp index e8ca73597d6..327cdfe1616 100644 --- a/src/Functions/dynamicType.cpp +++ b/src/Functions/dynamicType.cpp @@ -2,10 +2,14 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include #include @@ -65,11 +69,15 @@ public: const auto & variant_column = dynamic_column->getVariantColumn(); auto res = result_type->createColumn(); String element_type; + auto shared_variant_discr = dynamic_column->getSharedVariantDiscriminator(); + const auto & shared_variant = dynamic_column->getSharedVariant(); for (size_t i = 0; i != input_rows_count; ++i) { auto global_discr = variant_column.globalDiscriminatorAt(i); if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) element_type = name_for_null; + else if (global_discr == shared_variant_discr) + element_type = getTypeNameFromSharedVariantValue(shared_variant.getDataAt(variant_column.offsetAt(i))); else element_type = variant_info.variant_names[global_discr]; @@ -78,6 +86,63 @@ public: return res; } + + String getTypeNameFromSharedVariantValue(StringRef value) const + { + ReadBufferFromMemory buf(value.data, value.size); + return decodeDataType(buf)->getName(); + } +}; + +class FunctionIsDynamicElementInSharedData : public IFunction +{ +public: + static constexpr auto name = "isDynamicElementInSharedData"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty() || arguments.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.empty()); + + if (!isDynamic(arguments[0].type.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + return DataTypeFactory::instance().get("Bool"); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnDynamic * dynamic_column = checkAndGetColumn(arguments[0].column.get()); + if (!dynamic_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + const auto & variant_column = dynamic_column->getVariantColumn(); + const auto & local_discriminators = variant_column.getLocalDiscriminators(); + auto res = result_type->createColumn(); + auto & res_data = assert_cast(*res).getData(); + res_data.reserve(dynamic_column->size()); + auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column->getSharedVariantDiscriminator()); + for (size_t i = 0; i != input_rows_count; ++i) + res_data.push_back(local_discriminators[i] == shared_variant_local_discr); + + return res; + } }; } @@ -88,7 +153,7 @@ REGISTER_FUNCTION(DynamicType) .description = R"( Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it. )", - .syntax = {"dynamicType(variant)"}, + .syntax = {"dynamicType(dynamic)"}, .arguments = {{"dynamic", "Dynamic column"}}, .examples = {{{ "Example", @@ -104,6 +169,30 @@ SELECT d, dynamicType(d) FROM test; │ Hello, World! │ String │ │ [1,2,3] │ Array(Int64) │ └───────────────┴────────────────┘ +)"}}}, + .categories{"Variant"}, + }); + + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns true for rows in Dynamic column that are not separated into subcolumns and stored inside shared variant in binary form. +)", + .syntax = {"isDynamicElementInSharedData(dynamic)"}, + .arguments = {{"dynamic", "Dynamic column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (d Dynamic(max_types=2)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, isDynamicElementInSharedData(d) FROM test; +)", + R"( +┌─d─────────────┬─isDynamicElementInSharedData(d)─┐ +│ ᴺᵁᴸᴸ │ false │ +│ 42 │ false │ +│ Hello, World! │ true │ +│ [1,2,3] │ true │ +└───────────────┴────────────────────┘ )"}}}, .categories{"Variant"}, }); diff --git a/src/Functions/empty.cpp b/src/Functions/empty.cpp index 51811d21a0c..ddb503668cf 100644 --- a/src/Functions/empty.cpp +++ b/src/Functions/empty.cpp @@ -2,10 +2,18 @@ #include #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + namespace { @@ -13,13 +21,135 @@ struct NameEmpty { static constexpr auto name = "empty"; }; + using FunctionEmpty = FunctionStringOrArrayToT, NameEmpty, UInt8, false>; +/// Implements the empty function for JSON type. +class ExecutableFunctionJSONEmpty : public IExecutableFunction +{ +public: + std::string getName() const override { return NameEmpty::name; } + +private: + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + const ColumnWithTypeAndName & elem = arguments[0]; + const auto * object_column = typeid_cast(elem.column.get()); + if (!object_column) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected column type in function {}. Expected Object column, got {}", getName(), elem.column->getName()); + + auto res = DataTypeUInt8().createColumn(); + auto & data = typeid_cast(*res).getData(); + const auto & typed_paths = object_column->getTypedPaths(); + size_t size = object_column->size(); + /// If object column has at least 1 typed path, it will never be empty, because these paths always have values. + if (!typed_paths.empty()) + { + data.resize_fill(size, 0); + return res; + } + + const auto & dynamic_paths = object_column->getDynamicPaths(); + const auto & shared_data = object_column->getSharedDataPtr(); + data.reserve(size); + for (size_t i = 0; i != size; ++i) + { + bool empty = true; + /// Check if there is no paths in shared data. + if (!shared_data->isDefaultAt(i)) + { + empty = false; + } + /// Check that all dynamic paths have NULL value in this row. + else + { + for (const auto & [path, column] : dynamic_paths) + { + if (!column->isNullAt(i)) + { + empty = false; + break; + } + } + } + + data.push_back(empty); + } + + return res; + } +}; + +class FunctionEmptyJSON final : public IFunctionBase +{ +public: + FunctionEmptyJSON(const DataTypes & argument_types_, const DataTypePtr & return_type_) : argument_types(argument_types_), return_type(return_type_) {} + + String getName() const override { return NameEmpty::name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + const DataTypePtr & getResultType() const override { return return_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(); + } + +private: + DataTypes argument_types; + DataTypePtr return_type; +}; + +class FunctionEmptyOverloadResolver final : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = NameEmpty::name; + + static FunctionOverloadResolverPtr create(ContextPtr) + { + return std::make_unique(); + } + + String getName() const override { return NameEmpty::name; } + size_t getNumberOfArguments() const override { return 1; } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & arg : arguments) + argument_types.push_back(arg.type); + + if (argument_types.size() == 1 && isObject(argument_types[0])) + return std::make_shared(argument_types, return_type); + + return std::make_shared(std::make_shared(), argument_types, return_type); + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isStringOrFixedString(arguments[0]) + && !isArray(arguments[0]) + && !isMap(arguments[0]) + && !isUUID(arguments[0]) + && !isIPv6(arguments[0]) + && !isIPv4(arguments[0]) + && !isObject(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); + + return std::make_shared(); + } +}; + } REGISTER_FUNCTION(Empty) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 9591ea95254..be633bdfe37 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -43,6 +43,7 @@ public: max_query_size = settings.max_query_size; max_parser_depth = settings.max_parser_depth; max_parser_backtracks = settings.max_parser_backtracks; + print_pretty_type_names = settings.print_pretty_type_names; } String getName() const override { return name; } @@ -138,7 +139,11 @@ private: } } - formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine); + IAST::FormatSettings settings(buf, output_formatting == OutputFormatting::SingleLine, /*hilite*/ false); + settings.show_secrets = true; + settings.print_pretty_type_names = print_pretty_type_names; + ast->format(settings); + auto formatted = buf.stringView(); const size_t res_data_new_size = res_data_size + formatted.size() + 1; @@ -165,6 +170,7 @@ private: size_t max_query_size; size_t max_parser_depth; size_t max_parser_backtracks; + bool print_pretty_type_names; }; } diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h index 03831d37e0c..f1156d81f01 100644 --- a/src/Functions/geometryConverters.h +++ b/src/Functions/geometryConverters.h @@ -31,6 +31,9 @@ namespace ErrorCodes template using LineString = boost::geometry::model::linestring; +template +using MultiLineString = boost::geometry::model::multi_linestring>; + template using Ring = boost::geometry::model::ring; @@ -42,12 +45,14 @@ using MultiPolygon = boost::geometry::model::multi_polygon>; using CartesianPoint = boost::geometry::model::d2::point_xy; using CartesianLineString = LineString; +using CartesianMultiLineString = MultiLineString; using CartesianRing = Ring; using CartesianPolygon = Polygon; using CartesianMultiPolygon = MultiPolygon; using SphericalPoint = boost::geometry::model::point>; using SphericalLineString = LineString; +using SphericalMultiLineString = MultiLineString; using SphericalRing = Ring; using SphericalPolygon = Polygon; using SphericalMultiPolygon = MultiPolygon; @@ -113,6 +118,28 @@ struct ColumnToLineStringsConverter } }; +/** + * Class which converts Column with type Array(Array(Tuple(Float64, Float64))) to a vector of boost multi_linestring type. +*/ +template +struct ColumnToMultiLineStringsConverter +{ + static std::vector> convert(ColumnPtr col) + { + const IColumn::Offsets & offsets = typeid_cast(*col).getOffsets(); + size_t prev_offset = 0; + std::vector> answer(offsets.size()); + auto all_linestrings = ColumnToLineStringsConverter::convert(typeid_cast(*col).getDataPtr()); + for (size_t iter = 0; iter < offsets.size() && iter < all_linestrings.size(); ++iter) + { + for (size_t linestring_iter = prev_offset; linestring_iter < offsets[iter]; ++linestring_iter) + answer[iter].emplace_back(std::move(all_linestrings[linestring_iter])); + prev_offset = offsets[iter]; + } + return answer; + } +}; + /** * Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type. */ @@ -268,6 +295,38 @@ private: ColumnUInt64::MutablePtr offsets; }; +/// Serialize Point, MultiLineString as MultiLineString +template +class MultiLineStringSerializer +{ +public: + MultiLineStringSerializer() + : offsets(ColumnUInt64::create()) + {} + + explicit MultiLineStringSerializer(size_t n) + : offsets(ColumnUInt64::create(n)) + {} + + void add(const MultiLineString & multilinestring) + { + size += multilinestring.size(); + offsets->insertValue(size); + for (const auto & linestring : multilinestring) + linestring_serializer.add(linestring); + } + + ColumnPtr finalize() + { + return ColumnArray::create(linestring_serializer.finalize(), std::move(offsets)); + } + +private: + size_t size = 0; + LineStringSerializer linestring_serializer; + ColumnUInt64::MutablePtr offsets; +}; + /// Almost the same as LineStringSerializer /// Serialize Point, Ring as Ring template @@ -411,6 +470,11 @@ static void callOnGeometryDataType(DataTypePtr type, F && f) else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString") return f(ConverterType>()); + /// We should take the name into consideration to avoid ambiguity. + /// Because for example both MultiLineString and Polygon are resolved to Array(Array(Point)). + else if (factory.get("MultiLineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "MultiLineString") + return f(ConverterType>()); + /// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring. else if (factory.get("Ring")->equals(*type)) return f(ConverterType>()); diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp index 140f39d03b8..50a6275fc82 100644 --- a/src/Functions/getClientHTTPHeader.cpp +++ b/src/Functions/getClientHTTPHeader.cpp @@ -58,7 +58,7 @@ public: { Field header; source->get(row, header); - if (auto it = client_info.http_headers.find(header.get()); it != client_info.http_headers.end()) + if (auto it = client_info.http_headers.find(header.safeGet()); it != client_info.http_headers.end()) result->insert(it->second); else result->insertDefault(); diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 282d846094e..004586dce26 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -1,9 +1,8 @@ #include #include -#include #include #include - +#include namespace DB { diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp index 7adb0069121..e2f7cb84730 100644 --- a/src/Functions/lowerUTF8.cpp +++ b/src/Functions/lowerUTF8.cpp @@ -1,9 +1,10 @@ -#include +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -15,13 +16,25 @@ struct NameLowerUTF8 static constexpr auto name = "lowerUTF8"; }; -using FunctionLowerUTF8 = FunctionStringToString>, NameLowerUTF8>; +using FunctionLowerUTF8 = FunctionStringToString, NameLowerUTF8>; } REGISTER_FUNCTION(LowerUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "lowerUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index c4b675fcf6c..14b8b70b22c 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -200,7 +200,7 @@ public: if (value.isNull()) continue; - if (value.get() == 0) + if (value.safeGet() == 0) continue; instruction.condition_always_true = true; diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index bdaf57d65c9..85c342b5e7c 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -145,7 +145,7 @@ private: if (nested_names_field.getType() != Field::Types::Array) return {}; - const auto & nested_names_array = nested_names_field.get(); + const auto & nested_names_array = nested_names_field.safeGet(); Names nested_names; nested_names.reserve(nested_names_array.size()); @@ -155,7 +155,7 @@ private: if (nested_name_field.getType() != Field::Types::String) return {}; - nested_names.push_back(nested_name_field.get()); + nested_names.push_back(nested_name_field.safeGet()); } return nested_names; diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp new file mode 100644 index 00000000000..df8b825eabe --- /dev/null +++ b/src/Functions/overlay.cpp @@ -0,0 +1,718 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +/// If 'is_utf8' - measure offset and length in code points instead of bytes. +/// Syntax: +/// - overlay(input, replace, offset[, length]) +/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes +template +class FunctionOverlay : public IFunction +{ +public: + static constexpr auto name = is_utf8 ? "overlayUTF8" : "overlay"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args{ + {"input", static_cast(&isString), nullptr, "String"}, + {"replace", static_cast(&isString), nullptr, "String"}, + {"offset", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; + + FunctionArgumentDescriptors optional_args{ + {"length", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; + + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (input_rows_count == 0) + return ColumnString::create(); + + bool has_four_args = (arguments.size() == 4); + + ColumnPtr col_input = arguments[0].column; + const auto * col_input_const = checkAndGetColumn(col_input.get()); + const auto * col_input_string = checkAndGetColumn(col_input.get()); + bool input_is_const = (col_input_const != nullptr); + + ColumnPtr col_replace = arguments[1].column; + const auto * col_replace_const = checkAndGetColumn(col_replace.get()); + const auto * col_replace_string = checkAndGetColumn(col_replace.get()); + bool replace_is_const = (col_replace_const != nullptr); + + ColumnPtr col_offset = arguments[2].column; + const ColumnConst * col_offset_const = checkAndGetColumn(col_offset.get()); + bool offset_is_const = false; + Int64 offset = -1; + if (col_offset_const) + { + offset = col_offset_const->getInt(0); + offset_is_const = true; + } + + ColumnPtr col_length = has_four_args ? arguments[3].column : nullptr; + const ColumnConst * col_length_const = has_four_args ? checkAndGetColumn(col_length.get()) : nullptr; + bool length_is_const = false; + Int64 length = -1; + if (col_length_const) + { + length = col_length_const->getInt(0); + length_is_const = true; + } + + auto res_col = ColumnString::create(); + auto & res_data = res_col->getChars(); + auto & res_offsets = res_col->getOffsets(); + + res_offsets.resize_exact(input_rows_count); + if (col_input_const) + { + StringRef input = col_input_const->getDataAt(0); + res_data.reserve((input.size + 1) * input_rows_count); + } + else + { + res_data.reserve(col_input_string->getChars().size()); + } + + +#define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ + if (input_is_const && replace_is_const) \ + constantConstant( \ + input_rows_count, \ + col_input_const->getDataAt(0), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (input_is_const && !replace_is_const) \ + constantVector( \ + input_rows_count, \ + col_input_const->getDataAt(0), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (!input_is_const && replace_is_const) \ + vectorConstant( \ + input_rows_count, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else \ + vectorVector( \ + input_rows_count, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); + + if (!has_four_args) + { + if (offset_is_const) + { + OVERLAY_EXECUTE_CASE(false, true, false) + } + else + { + OVERLAY_EXECUTE_CASE(false, false, false) + } + } + else + { + if (offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(true, true, true) + } + else if (offset_is_const && !length_is_const) + { + OVERLAY_EXECUTE_CASE(true, true, false) + } + else if (!offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(true, false, true) + } + else + { + OVERLAY_EXECUTE_CASE(true, false, false) + } + } +#undef OVERLAY_EXECUTE_CASE + + return res_col; + } + + +private: + /// input offset is 1-based, maybe negative + /// output result is 0-based valid offset, within [0, input_size] + static size_t getValidOffset(Int64 offset, size_t input_size) + { + if (offset > 0) + { + if (static_cast(offset) > input_size + 1) + return input_size; + else + return offset - 1; + } + else + { + if (input_size < -static_cast(offset)) + return 0; + else + return input_size + offset; + } + } + + /// get character count of a slice [data, data+bytes) + static size_t getSliceSize(const UInt8 * data, size_t bytes) + { + if constexpr (is_utf8) + return UTF8::countCodePoints(data, bytes); + else + return bytes; + } + + template + void constantConstant( + size_t rows, + const StringRef & input, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) const + { + if (has_four_args && length_is_const && const_length < 0) + { + constantConstant( + rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + valid_offset = getValidOffset(const_offset, input_size); + + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); + size_t valid_length = 0; // not negative + if constexpr (has_four_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (!has_four_args) + { + valid_length = replace_size; + } + + Int64 offset = 0; // start from 1, maybe negative + Int64 length = 0; // maybe negative + const UInt8 * input_begin = reinterpret_cast(input.data); + const UInt8 * input_end = reinterpret_cast(input.data + input.size); + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = getValidOffset(offset, input_size); + } + + if constexpr (has_four_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); + + if constexpr (!is_utf8) + { + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorConstant( + size_t rows, + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) const + { + if (has_four_args && length_is_const && const_length < 0) + { + vectorConstant( + rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (has_four_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (!has_four_args) + { + valid_length = replace_size; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); + + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else + { + offset = column_offset->getInt(i); + valid_offset = getValidOffset(offset, input_size); + } + + if constexpr (has_four_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); + + if constexpr (!is_utf8) + { + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void constantVector( + size_t rows, + const StringRef & input, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) const + { + if (has_four_args && length_is_const && const_length < 0) + { + constantVector( + rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + valid_offset = getValidOffset(const_offset, input_size); + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (has_four_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + const auto * input_begin = reinterpret_cast(input.data); + const auto * input_end = reinterpret_cast(input.data + input.size); + Int64 offset = 0; // start from 1, maybe negative + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = getValidOffset(offset, input_size); + } + + if constexpr (!has_four_args) + { + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); + + if constexpr (!is_utf8) + { + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorVector( + size_t rows, + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) const + { + if (has_four_args && length_is_const && const_length < 0) + { + vectorVector( + rows, + input_data, + input_offsets, + replace_data, + replace_offsets, + column_offset, + column_length, + const_offset, + -1, + res_data, + res_offsets); + return; + } + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (has_four_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); + + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); + + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else + { + offset = column_offset->getInt(i); + valid_offset = getValidOffset(offset, input_size); + } + + if constexpr (!has_four_args) + { + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); + + if constexpr (!is_utf8) + { + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } +}; + +} + +REGISTER_FUNCTION(Overlay) +{ + factory.registerFunction>( + {.description = R"( +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +)", + .categories{"String"}}, + FunctionFactory::Case::Insensitive); + + factory.registerFunction>( + {.description = R"( +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +)", + .categories{"String"}}, + FunctionFactory::Case::Sensitive); +} +} diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp index 329242e762e..43ab03f8c1f 100644 --- a/src/Functions/polygonsIntersection.cpp +++ b/src/Functions/polygonsIntersection.cpp @@ -75,6 +75,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 3c219d0facb..6faec95bb7b 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -73,6 +73,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index 969eb2f78fb..5378ff636f8 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -73,6 +73,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index c63ad5ef868..dacd1c0e18f 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -77,6 +77,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index eb262777b0d..2010b5167e7 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -87,6 +87,11 @@ struct ReadWKTLineStringNameHolder static constexpr const char * name = "readWKTLineString"; }; +struct ReadWKTMultiLineStringNameHolder +{ + static constexpr const char * name = "readWKTMultiLineString"; +}; + struct ReadWKTRingNameHolder { static constexpr const char * name = "readWKTRing"; @@ -131,6 +136,31 @@ Parses a Well-Known Text (WKT) representation of a LineString geometry and retur }, .categories{"Unique identifiers"} }); + factory.registerFunction, ReadWKTMultiLineStringNameHolder>>(FunctionDocumentation + { + .description=R"( +Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format. +)", + .syntax = "readWKTMultiLineString(wkt_string)", + .arguments{ + {"wkt_string", "The input WKT string representing a MultiLineString geometry."} + }, + .returned_value = "The function returns a ClickHouse internal representation of the multilinestring geometry.", + .examples{ + {"first call", "SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');", R"( +┌─readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')─┐ +│ [[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]] │ +└──────────────────────────────────────────────────────────────────────────────┘ + + )"}, + {"second call", "SELECT toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));", R"( +┌─toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'))─┐ +│ MultiLineString │ +└─────────────────────────────────────────────────────────────────────────┘ + )"}, + }, + .categories{"Unique identifiers"} + }); factory.registerFunction, ReadWKTRingNameHolder>>(); factory.registerFunction, ReadWKTPolygonNameHolder>>(); factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index 471354235d5..ecf8398bbd5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -153,12 +153,8 @@ public: return true; } - std::vector xfreq(spec_len); double step = 0.5 / (spec_len - 1); - for (size_t i = 0; i < spec_len; ++i) - xfreq[i] = i * step; - - auto freq = xfreq[idx]; + auto freq = idx * step; period = std::round(1 / freq); return true; diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 62ee19fa904..b6e4b36ee64 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,11 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace FailPoints +{ + extern const char infinite_sleep[]; +} + /** sleep(seconds) - the specified number of seconds sleeps each columns. */ @@ -107,6 +113,8 @@ public: { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. UInt64 microseconds = static_cast(seconds * 1e6); + FailPointInjection::pauseFailPoint(FailPoints::infinite_sleep); + if (max_microseconds && microseconds > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {} microseconds", max_microseconds, microseconds); diff --git a/src/Functions/toBool.cpp b/src/Functions/toBool.cpp index 6f2c436c1ea..ac595d313e3 100644 --- a/src/Functions/toBool.cpp +++ b/src/Functions/toBool.cpp @@ -54,8 +54,7 @@ namespace } }; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - auto func_cast = func_builder_cast->build(cast_args); + auto func_cast = createInternalCast(arguments[0], result_type, CastType::nonAccurate, {}); return func_cast->execute(cast_args, result_type, arguments[0].column->size()); } }; diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp index 659e67f0ef3..ef26430331f 100644 --- a/src/Functions/upperUTF8.cpp +++ b/src/Functions/upperUTF8.cpp @@ -1,8 +1,10 @@ +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -14,13 +16,25 @@ struct NameUpperUTF8 static constexpr auto name = "upperUTF8"; }; -using FunctionUpperUTF8 = FunctionStringToString>, NameUpperUTF8>; +using FunctionUpperUTF8 = FunctionStringToString, NameUpperUTF8>; } REGISTER_FUNCTION(UpperUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "upperUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 9704d034b2a..fcd96e97b4e 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -34,14 +34,20 @@ namespace ErrorCodes extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; } -void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout) +void setResponseDefaultHeaders(HTTPServerResponse & response) { if (!response.getKeepAlive()) return; - Poco::Timespan timeout(keep_alive_timeout, 0); - if (timeout.totalSeconds()) - response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds())); + const size_t keep_alive_timeout = response.getSession().getKeepAliveTimeout(); + const size_t keep_alive_max_requests = response.getSession().getMaxKeepAliveRequests(); + if (keep_alive_timeout) + { + if (keep_alive_max_requests) + response.set("Keep-Alive", fmt::format("timeout={}, max={}", keep_alive_timeout, keep_alive_max_requests)); + else + response.set("Keep-Alive", fmt::format("timeout={}", keep_alive_timeout)); + } } HTTPSessionPtr makeHTTPSession( diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 3a1fa5bebee..4d0580acaba 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -54,7 +54,7 @@ private: using HTTPSessionPtr = std::shared_ptr; -void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); +void setResponseDefaultHeaders(HTTPServerResponse & response); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession( diff --git a/src/IO/NetUtils.h b/src/IO/NetUtils.h new file mode 100644 index 00000000000..12f09524ae7 --- /dev/null +++ b/src/IO/NetUtils.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +template +constexpr T netToHost(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T hostToNet(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T toLittleEndian(T value) noexcept +{ + if constexpr (std::endian::native == std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T toBigEndian(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T fromLittleEndian(T value) noexcept +{ + if constexpr (std::endian::native == std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T fromBigEndian(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +} diff --git a/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.cpp b/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.cpp new file mode 100644 index 00000000000..86b7eb4d7f7 --- /dev/null +++ b/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.cpp @@ -0,0 +1,56 @@ +#include "config.h" + +#if USE_PROTOBUF +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ProtobufZeroCopyInputStreamFromReadBuffer::ProtobufZeroCopyInputStreamFromReadBuffer(std::unique_ptr in_) : in(std::move(in_)) +{ +} + +ProtobufZeroCopyInputStreamFromReadBuffer::~ProtobufZeroCopyInputStreamFromReadBuffer() = default; + +bool ProtobufZeroCopyInputStreamFromReadBuffer::Next(const void ** data, int * size) +{ + if (in->eof()) + return false; + *data = in->position(); + *size = static_cast(in->available()); + in->position() += *size; + return true; +} + +void ProtobufZeroCopyInputStreamFromReadBuffer::BackUp(int count) +{ + if (static_cast(in->offset()) < count) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "ProtobufZeroCopyInputStreamFromReadBuffer::BackUp() cannot back up {} bytes (max = {} bytes)", + count, + in->offset()); + + in->position() -= count; +} + +bool ProtobufZeroCopyInputStreamFromReadBuffer::Skip(int count) +{ + return static_cast(in->tryIgnore(count)) == count; +} + +int64_t ProtobufZeroCopyInputStreamFromReadBuffer::ByteCount() const +{ + return in->count(); +} + +} + +#endif diff --git a/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.h b/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.h new file mode 100644 index 00000000000..3f86815ef3f --- /dev/null +++ b/src/IO/Protobuf/ProtobufZeroCopyInputStreamFromReadBuffer.h @@ -0,0 +1,38 @@ +#pragma once + +#include "config.h" +#if USE_PROTOBUF + +#include + + +namespace DB +{ +class ReadBuffer; + +class ProtobufZeroCopyInputStreamFromReadBuffer : public google::protobuf::io::ZeroCopyInputStream +{ +public: + explicit ProtobufZeroCopyInputStreamFromReadBuffer(std::unique_ptr in_); + ~ProtobufZeroCopyInputStreamFromReadBuffer() override; + + // Obtains a chunk of data from the stream. + bool Next(const void ** data, int * size) override; + + // Backs up a number of bytes, so that the next call to Next() returns + // data again that was already returned by the last call to Next(). + void BackUp(int count) override; + + // Skips a number of bytes. + bool Skip(int count) override; + + // Returns the total number of bytes read since this object was created. + int64_t ByteCount() const override; + +private: + std::unique_ptr in; +}; + +} + +#endif diff --git a/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp new file mode 100644 index 00000000000..d1e02b436f3 --- /dev/null +++ b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp @@ -0,0 +1,60 @@ +#include "config.h" + +#if USE_PROTOBUF +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ProtobufZeroCopyOutputStreamFromWriteBuffer::ProtobufZeroCopyOutputStreamFromWriteBuffer(WriteBuffer & out_) : out(&out_) +{ +} + +ProtobufZeroCopyOutputStreamFromWriteBuffer::ProtobufZeroCopyOutputStreamFromWriteBuffer(std::unique_ptr out_) + : ProtobufZeroCopyOutputStreamFromWriteBuffer(*out_) +{ + out_holder = std::move(out_); +} + +ProtobufZeroCopyOutputStreamFromWriteBuffer::~ProtobufZeroCopyOutputStreamFromWriteBuffer() = default; + +bool ProtobufZeroCopyOutputStreamFromWriteBuffer::Next(void ** data, int * size) +{ + *data = out->position(); + *size = static_cast(out->available()); + out->position() += *size; + return true; +} + +void ProtobufZeroCopyOutputStreamFromWriteBuffer::BackUp(int count) +{ + if (static_cast(out->offset()) < count) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "ProtobufZeroCopyOutputStreamFromWriteBuffer::BackUp() cannot back up {} bytes (max = {} bytes)", + count, + out->offset()); + + out->position() -= count; +} + +int64_t ProtobufZeroCopyOutputStreamFromWriteBuffer::ByteCount() const +{ + return out->count(); +} + +void ProtobufZeroCopyOutputStreamFromWriteBuffer::finalize() +{ + out->finalize(); +} + +} + +#endif diff --git a/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.h b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.h new file mode 100644 index 00000000000..c47cef9ff4d --- /dev/null +++ b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.h @@ -0,0 +1,40 @@ +#pragma once + +#include "config.h" +#if USE_PROTOBUF + +#include + + +namespace DB +{ +class WriteBuffer; + +class ProtobufZeroCopyOutputStreamFromWriteBuffer : public google::protobuf::io::ZeroCopyOutputStream +{ +public: + explicit ProtobufZeroCopyOutputStreamFromWriteBuffer(WriteBuffer & out_); + explicit ProtobufZeroCopyOutputStreamFromWriteBuffer(std::unique_ptr out_); + + ~ProtobufZeroCopyOutputStreamFromWriteBuffer() override; + + // Obtains a buffer into which data can be written. + bool Next(void ** data, int * size) override; + + // Backs up a number of bytes, so that the end of the last buffer returned + // by Next() is not actually written. + void BackUp(int count) override; + + // Returns the total number of bytes written since this object was created. + int64_t ByteCount() const override; + + void finalize(); + +private: + WriteBuffer * out; + std::unique_ptr out_holder; +}; + +} + +#endif diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 26cdee4140c..bbf9f96404f 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -bool ReadBufferFromPocoSocket::nextImpl() +ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t size) { ssize_t bytes_read = 0; Stopwatch watch; @@ -43,14 +43,11 @@ bool ReadBufferFromPocoSocket::nextImpl() ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); }); + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); + /// Add more details to exceptions. try { - CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); - - if (internal_buffer.size() > INT_MAX) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); - /// If async_callback is specified, set socket to non-blocking mode /// and try to read data from it, if socket is not ready for reading, /// run async_callback and try again later. @@ -61,7 +58,7 @@ bool ReadBufferFromPocoSocket::nextImpl() socket.setBlocking(false); SCOPE_EXIT(socket.setBlocking(true)); bool secure = socket.secure(); - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too). while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read))))) @@ -73,12 +70,12 @@ bool ReadBufferFromPocoSocket::nextImpl() async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR); /// Try to read again. - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); } } else { - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); } } catch (const Poco::Net::NetException & e) @@ -99,6 +96,16 @@ bool ReadBufferFromPocoSocket::nextImpl() if (bytes_read < 0) throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString()); + return bytes_read; +} + +bool ReadBufferFromPocoSocketBase::nextImpl() +{ + if (internal_buffer.size() > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + + ssize_t bytes_read = socketReceiveBytesImpl(internal_buffer.begin(), internal_buffer.size()); + if (read_event != ProfileEvents::end()) ProfileEvents::increment(read_event, bytes_read); @@ -110,7 +117,7 @@ bool ReadBufferFromPocoSocket::nextImpl() return true; } -ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) +ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size) : BufferWithOwnMemory(buf_size) , socket(socket_) , peer_address(socket.peerAddress()) @@ -119,19 +126,22 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, { } -ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) - : ReadBufferFromPocoSocket(socket_, buf_size) +ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) + : ReadBufferFromPocoSocketBase(socket_, buf_size) { read_event = read_event_; } -bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const +bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const { - if (available()) + /// For secure socket it is important to check if any remaining data available in underlying decryption buffer - + /// read always retrieves the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size - + /// further poll() can block though there is still data to read in the underlying decryption buffer. + if (available() || socket.impl()->available()) return true; Stopwatch watch; - bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + bool res = socket.impl()->poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); return res; } diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index 76156612764..912388adaac 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -9,7 +9,7 @@ namespace DB { /// Works with the ready Poco::Net::Socket. Blocking operations. -class ReadBufferFromPocoSocket : public BufferWithOwnMemory +class ReadBufferFromPocoSocketBase : public BufferWithOwnMemory { protected: Poco::Net::Socket & socket; @@ -25,16 +25,29 @@ protected: bool nextImpl() override; public: - explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); - explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); bool poll(size_t timeout_microseconds) const; void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); } + ssize_t socketReceiveBytesImpl(char * ptr, size_t size); + private: AsyncCallback async_callback; std::string socket_description; }; +class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase +{ +public: + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBufferFromPocoSocketBase(socket_, buf_size) + {} + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size) + {} +}; + } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp new file mode 100644 index 00000000000..4a1e3732a55 --- /dev/null +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -0,0 +1,166 @@ +#include +#include +#include + + +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ + +ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size) + : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size) +{} + +ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) + : ReadBufferFromPocoSocketBase( + socket_, read_event_, + std::min(buf_size, static_cast(std::numeric_limits::max()))), + our_address(socket_.address()), log(getLogger("Protocol")) +{} + +void ReadBufferFromPocoSocketChunked::enableChunked() +{ + if (chunked) + return; + chunked = 1; + data_end = buffer().end(); + /// Resize working buffer so any next read will call nextImpl + working_buffer.resize(offset()); + chunk_left = 0; + next_chunk = 0; +} + +bool ReadBufferFromPocoSocketChunked::hasBufferedData() const +{ + if (available()) + return true; + + return chunked && (static_cast(data_end - working_buffer.end()) > sizeof(next_chunk)); +} + +bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const +{ + if (chunked) + if (available() || static_cast(data_end - working_buffer.end()) > sizeof(next_chunk)) + return true; + + return ReadBufferFromPocoSocketBase::poll(timeout_microseconds); +} + + +bool ReadBufferFromPocoSocketChunked::loadNextChunk(Position c_pos, bool cont) +{ + auto buffered = std::min(static_cast(data_end - c_pos), sizeof(next_chunk)); + + if (buffered) + std::memcpy(&next_chunk, c_pos, buffered); + if (buffered < sizeof(next_chunk)) + if (socketReceiveBytesImpl(reinterpret_cast(&next_chunk) + buffered, sizeof(next_chunk) - buffered) < static_cast(sizeof(next_chunk) - buffered)) + return false; + next_chunk = fromLittleEndian(next_chunk); + + if (next_chunk) + { + if (cont) + LOG_TEST(log, "{} <- {} Chunk receive continued. Size {}", ourAddress().toString(), peerAddress().toString(), next_chunk); + } + else + LOG_TEST(log, "{} <- {} Chunk receive ended.", ourAddress().toString(), peerAddress().toString()); + + return true; +} + +bool ReadBufferFromPocoSocketChunked::processChunkLeft(Position c_pos) +{ + if (data_end - c_pos < chunk_left) + { + working_buffer.resize(data_end - buffer().begin()); + nextimpl_working_buffer_offset = c_pos - buffer().begin(); + chunk_left -= (data_end - c_pos); + return true; + } + + nextimpl_working_buffer_offset = c_pos - buffer().begin(); + working_buffer.resize(nextimpl_working_buffer_offset + chunk_left); + + c_pos += chunk_left; + + if (!loadNextChunk(c_pos, true)) + return false; + + chunk_left = 0; + return true; +} + + +bool ReadBufferFromPocoSocketChunked::nextImpl() +{ + if (!chunked) + return ReadBufferFromPocoSocketBase::nextImpl(); + + auto * c_pos = pos; + + if (chunk_left == 0) + { + if (next_chunk == 0) + { + if (chunked == 1) + chunked = 2; // first chunked block - no end marker + else + c_pos = pos + sizeof(next_chunk); // bypass chunk end marker + + if (c_pos > data_end) + c_pos = data_end; + + if (!loadNextChunk(c_pos)) + return false; + + chunk_left = next_chunk; + next_chunk = 0; + + if (chunk_left == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); + + c_pos += sizeof(next_chunk); + + if (c_pos >= data_end) + { + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); + } + + LOG_TEST(log, "{} <- {} Chunk receive started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast(*c_pos), chunk_left); + } + else + { + c_pos += sizeof(next_chunk); + if (c_pos >= data_end) + { + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); + } + + chunk_left = next_chunk; + next_chunk = 0; + } + } + else + { + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); + } + + return processChunkLeft(c_pos); +} + +} diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h new file mode 100644 index 00000000000..8bc4024b978 --- /dev/null +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include + +/* + +Handshake +============= + | 'Hello' type + | handshake exchange + | chunked protocol negotiation + +============= + + +Basic chunk: + +============= +Chunk begins | 0x12345678 chunk size, 4 bytes little endian + +------------- + | Packet type always follows beginning of the chunk + | packet data + +------------- +Chunk ends | 0x00000000 4 zero bytes + +============= + + +Datastream chunk: + +============= +Chunk begins | 0x12345678 + +------------- + | Packet type + | packet data + +------------- + | Packet type + | packet data + +------------- +...arbitrary number ..... +of packets... ..... + +------------- + | Packet type + | packet data + +------------- +Chunk ends | 0x00000000 + +============= + + +Multipart chunk: + +============= +Chunk begins | 0x12345678 chunk part size, 4 bytes little endian + +------------- + | Packet type + | packet data + +------------- + | Packet type + | (partial) packet data + +============= +Chunk continues | 0x12345678 chunk next part size, 4 bytes little endian + +============= + | possibly previous packet's data + +------------- + | Packet type + | packet data + +------------- +...arbitrary number ..... +of chunk parts... ..... + +------------- + | Packet type + | packet data + +------------- +Chunk ends | 0x00000000 + +============= + +*/ + +namespace DB +{ + +class ReadBufferFromPocoSocketChunked: public ReadBufferFromPocoSocketBase +{ +public: + using ReadBufferFromPocoSocketBase::setAsyncCallback; + + explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + + void enableChunked(); + + bool hasBufferedData() const; + + bool poll(size_t timeout_microseconds) const; + + Poco::Net::SocketAddress peerAddress() { return peer_address; } + Poco::Net::SocketAddress ourAddress() { return our_address; } + +protected: + bool loadNextChunk(Position c_pos, bool cont = false); + bool processChunkLeft(Position c_pos); + bool nextImpl() override; + + Poco::Net::SocketAddress our_address; + +private: + LoggerPtr log; + Position data_end = nullptr; // end position of data in the internal_buffer + UInt32 chunk_left = 0; // chunk left to read from socket + UInt32 next_chunk = 0; // size of the next cnunk + UInt8 chunked = 0; // 0 - disabled; 1 - started; 2 - enabled; +}; + +} diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9559462e62b..a38dc1ecefb 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -855,6 +856,12 @@ void readBackQuotedString(String & s, ReadBuffer & buf) readBackQuotedStringInto(s, buf); } +bool tryReadBackQuotedString(String & s, ReadBuffer & buf) +{ + s.clear(); + return readAnyQuotedStringInto<'`', false, String, bool>(s, buf); +} + void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) { s.clear(); @@ -1270,8 +1277,83 @@ ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf) template void readJSONArrayInto, void>(PaddedPODArray & s, ReadBuffer & buf); template bool readJSONArrayInto, bool>(PaddedPODArray & s, ReadBuffer & buf); +std::string_view readJSONObjectAsViewPossiblyInvalid(ReadBuffer & buf, String & object_buffer) +{ + if (buf.eof() || *buf.position() != '{') + throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object should start with '{{'"); + + char * start = buf.position(); + bool use_object_buffer = false; + object_buffer.clear(); + + ++buf.position(); + Int64 balance = 1; + bool quotes = false; + + while (true) + { + if (!buf.hasPendingData() && !use_object_buffer) + { + use_object_buffer = true; + object_buffer.append(start, buf.position() - start); + } + + if (buf.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON object"); + + char * next_pos = find_first_symbols<'\\', '{', '}', '"'>(buf.position(), buf.buffer().end()); + if (use_object_buffer) + object_buffer.append(buf.position(), next_pos - buf.position()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (use_object_buffer) + object_buffer.push_back(*buf.position()); + + if (*buf.position() == '\\') + { + ++buf.position(); + if (!buf.hasPendingData() && !use_object_buffer) + { + use_object_buffer = true; + object_buffer.append(start, buf.position() - start); + } + + if (buf.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON object"); + + if (use_object_buffer) + object_buffer.push_back(*buf.position()); + ++buf.position(); + + continue; + } + + if (*buf.position() == '"') + quotes = !quotes; + else if (!quotes) // can be only opening_bracket or closing_bracket + balance += *buf.position() == '{' ? 1 : -1; + + ++buf.position(); + + if (balance == 0) + { + if (use_object_buffer) + return object_buffer; + return {start, buf.position()}; + } + + if (balance < 0) + break; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object should have equal number of opening and closing brackets"); +} + template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1318,6 +1400,9 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); + ++buf.position(); if (!append_digit(month)) @@ -1325,7 +1410,11 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) append_digit(month); if (!buf.eof() && !isNumericASCII(*buf.position())) + { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); ++buf.position(); + } else return error(); @@ -1338,12 +1427,12 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } -template void readDateTextFallback(LocalDate &, ReadBuffer &); -template bool readDateTextFallback(LocalDate &, ReadBuffer &); +template void readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); +template bool readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters, const char * allowed_time_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1400,10 +1489,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D size_t size = buf.read(s_pos, remaining_date_size); if (size != remaining_date_size) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", std::string_view(s, already_read_length + size)); else return false; } @@ -1413,6 +1500,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return false; + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return false; } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1430,10 +1520,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (size != time_broken_down_length) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", std::string_view(s, size)); else return false; } @@ -1443,6 +1531,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4]) || !isNumericASCII(s[6]) || !isNumericASCII(s[7])) return false; + + if (!isSymbolIn(s[2], allowed_time_delimiters) || !isSymbolIn(s[5], allowed_time_delimiters)) + return false; } hour = (s[0] - '0') * 10 + (s[1] - '0'); @@ -1488,10 +1579,10 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D return ReturnType(true); } -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); template @@ -1905,6 +1996,11 @@ static ReturnType readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc pa return ReturnType(true); } +void readParsedValueIntoString(String & s, ReadBuffer & buf, std::function parse_func) +{ + readParsedValueInto(s, buf, std::move(parse_func)); +} + template static ReturnType readQuotedStringFieldInto(Vector & s, ReadBuffer & buf) { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ffba4fafb5c..05198361ca2 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -258,6 +258,20 @@ inline void readBoolText(bool & x, ReadBuffer & buf) char tmp = '0'; readChar(tmp, buf); x = tmp != '0'; + + if (!buf.eof() && isAlphaASCII(tmp)) + { + if (tmp == 't' || tmp == 'T') + { + assertStringCaseInsensitive("rue", buf); + x = true; + } + else if (tmp == 'f' || tmp == 'F') + { + assertStringCaseInsensitive("alse", buf); + x = false; + } + } } template @@ -600,6 +614,7 @@ bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); void readJSONString(String & s, ReadBuffer & buf, const FormatSettings::JSON & settings); void readBackQuotedString(String & s, ReadBuffer & buf); +bool tryReadBackQuotedString(String & s, ReadBuffer & buf); void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); void readStringUntilEOF(String & s, ReadBuffer & buf); @@ -687,6 +702,10 @@ ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf); template ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf); +/// Similar to readJSONObjectPossiblyInvalid but avoids copying the data if JSON object fits into current read buffer +/// If copying is unavoidable, it copies data into provided object_buffer and returns string_view to it. +std::string_view readJSONObjectAsViewPossiblyInvalid(ReadBuffer & buf, String & object_buffer); + template void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf); @@ -703,13 +722,28 @@ struct NullOutput }; template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters); + +inline bool isSymbolIn(char symbol, const char * symbols) +{ + if (symbols == nullptr) + return true; + + const char * pos = symbols; + while (*pos) + { + if (*pos == symbol) + return true; + ++pos; + } + return false; +} /// In YYYY-MM-DD format. /// For convenience, Month and Day parts can have single digit instead of two digits. /// Any separators other than '-' are supported. template -inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) +inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -753,6 +787,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + if (!isNumericASCII(pos[0])) return error(); @@ -768,6 +805,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0])) return error(); + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + day = pos[0] - '0'; if (isNumericASCII(pos[1])) { @@ -783,7 +823,7 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } else - return readDateTextFallback(date, buf); + return readDateTextFallback(date, buf, allowed_delimiters); } inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) @@ -797,15 +837,15 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) } template -inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); @@ -814,15 +854,15 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU } template -inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. @@ -846,19 +886,19 @@ inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTI readDateTextImpl(date, buf, date_lut); } -inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) +inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, allowed_delimiters); } -inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } -inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } UUID parseUUID(std::span src); @@ -975,13 +1015,13 @@ inline T parseFromString(std::string_view str) template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr); /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone. * As an exception, also supported parsing of unix timestamp in form of decimal number. */ template -inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1014,6 +1054,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return ReturnType(false); + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return ReturnType(false); } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1033,6 +1076,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15]) || !isNumericASCII(s[17]) || !isNumericASCII(s[18])) return ReturnType(false); + + if (!isSymbolIn(s[13], allowed_time_delimiters) || !isSymbolIn(s[16], allowed_time_delimiters)) + return ReturnType(false); } hour = (s[11] - '0') * 10 + (s[12] - '0'); @@ -1057,11 +1103,11 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons return readIntTextImpl(datetime, buf); } else - return readDateTimeTextFallback(datetime, buf, date_lut); + return readDateTimeTextFallback(datetime, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } template -inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1075,7 +1121,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re { try { - readDateTimeTextImpl(whole, buf, date_lut); + readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } catch (const DB::Exception &) { @@ -1085,7 +1131,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re } else { - auto ok = readDateTimeTextImpl(whole, buf, date_lut); + auto ok = readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); if (!ok && (buf.eof() || *buf.position() != '.')) return ReturnType(false); } @@ -1168,14 +1214,14 @@ inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer readDateTimeTextImpl(datetime64, scale, buf, date_lut); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime, buf, time_zone); + return readDateTimeTextImpl(datetime, buf, time_zone, allowed_date_delimiters, allowed_time_delimiters); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime64, scale, buf, date_lut); + return readDateTimeTextImpl(datetime64, scale, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) @@ -1708,6 +1754,7 @@ inline T parse(const char * data, size_t size) T res; ReadBufferFromMemory buf(data, size); readText(res, buf); + assertEOF(buf); return res; } @@ -1715,7 +1762,9 @@ template inline bool tryParse(T & res, const char * data, size_t size) { ReadBufferFromMemory buf(data, size); - return tryReadText(res, buf); + if (!tryReadText(res, buf)) + return false; + return buf.eof(); } template @@ -1893,6 +1942,8 @@ struct PcgDeserializer } }; +void readParsedValueIntoString(String & s, ReadBuffer & buf, std::function parse_func); + template ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf); diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 4b2e6580f9b..a7bc0d4845c 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -238,7 +238,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( if (iter == http_header_entries.end()) { - http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING)); + http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}{}", VERSION_STRING, VERSION_OFFICIAL)); } if (!delay_initialization && use_external_buffer) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index a966e370ca1..d4c41a3f2cd 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -46,7 +46,7 @@ namespace ProfileEvents namespace CurrentMetrics { - extern const Metric S3DiskNoKeyErrors; + extern const Metric DiskS3NoSuchKeyErrors; } namespace DB @@ -701,7 +701,7 @@ RequestResult Client::processRequestResult(RequestResult && outcome) const return std::forward(outcome); if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) - CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); + CurrentMetrics::add(CurrentMetrics::DiskS3NoSuchKeyErrors); String enriched_message = fmt::format( "{} {}", @@ -982,10 +982,10 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT { auto context = Context::getGlobalContextInstance(); chassert(context); - auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); + auto proxy_configuration_resolver = ProxyConfigurationResolverProvider::get(ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); - auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); }; - auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; + auto per_request_configuration = [=]{ return proxy_configuration_resolver->resolve(); }; + auto error_report = [=](const ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; auto config = PocoHTTPClientConfiguration( per_request_configuration, diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index dfb7727fca4..d6f7542da6b 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -145,12 +145,16 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const { String user_agent_string = awsComputeUserAgentString(); auto [new_token, response_code] = getEC2MetadataToken(user_agent_string); - if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST) + if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST + || response_code == Aws::Http::HttpResponseCode::REQUEST_NOT_MADE) + { + /// At least the host should be available and reply, otherwise neither IMDSv2 nor IMDSv1 are usable. return {}; + } else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty()) { LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, " - "falling back to less secure way. HTTP response code: {}", response_code); + "falling back to a less secure way. HTTP response code: {}", response_code); return getDefaultCredentials(); } @@ -247,7 +251,7 @@ static Aws::String getAWSMetadataEndpoint() return ec2_metadata_service_endpoint; } -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) { auto endpoint = getAWSMetadataEndpoint(); return std::make_shared(client_configuration, endpoint.c_str()); @@ -781,11 +785,13 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. - aws_client_configuration.connectTimeoutMs = 1000; + /// But the connection timeout should be small because there is the case when there is no IMDS at all, + /// like outside of the cloud, on your own machines. + aws_client_configuration.connectTimeoutMs = 10; aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.retryStrategy = std::make_shared(1, 1000); - auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration); + auto ec2_metadata_client = createEC2MetadataClient(aws_client_configuration); auto config_loader = std::make_shared(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request); AddProvider(std::make_shared(config_loader)); diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 95297ab0538..042c48ec15a 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -70,7 +70,7 @@ private: LoggerPtr logger; }; -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader { diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index aab7a39534d..3e060e21c51 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,4 +1,5 @@ #include +#include #include "config.h" #if USE_AWS_S3 @@ -17,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include + static const int SUCCESS_RESPONSE_MIN = 200; static const int SUCCESS_RESPONSE_MAX = 299; @@ -84,7 +87,7 @@ namespace DB::S3 { PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -94,7 +97,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_) + std::function error_report_) : per_request_configuration(per_request_configuration_) , force_region(force_region_) , remote_host_filter(remote_host_filter_) @@ -107,6 +110,8 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_) , error_report(error_report_) { + /// This is used to identify configurations created by us. + userAgent = std::string(VERSION_FULL) + VERSION_OFFICIAL; } void PocoHTTPClientConfiguration::updateSchemeAndRegion() @@ -128,7 +133,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } else { - /// In global mode AWS C++ SDK send `us-east-1` but accept switching to another one if being suggested. + /// In global mode AWS C++ SDK sends `us-east-1` but accepts switching to another one if being suggested. region = Aws::Region::AWS_GLOBAL; } } @@ -166,6 +171,17 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config { } +PocoHTTPClient::PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration) + : timeouts(ConnectionTimeouts() + .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000)) + .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withTCPKeepAliveTimeout(Poco::Timespan( + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))), + remote_host_filter(Context::getGlobalContextInstance()->getRemoteHostFilter()) +{ +} + std::shared_ptr PocoHTTPClient::MakeRequest( const std::shared_ptr & request, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, @@ -381,8 +397,11 @@ void PocoHTTPClient::makeRequestInternalImpl( try { - const auto proxy_configuration = per_request_configuration(); - for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) + ProxyConfiguration proxy_configuration; + if (per_request_configuration) + proxy_configuration = per_request_configuration(); + + for (size_t attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); @@ -500,7 +519,6 @@ void PocoHTTPClient::makeRequestInternalImpl( LOG_TEST(log, "Redirecting request to new location: {}", location); addMetric(request, S3MetricType::Redirects); - continue; } @@ -548,9 +566,9 @@ void PocoHTTPClient::makeRequestInternalImpl( } else { - if (status_code == 429 || status_code == 503) - { // API throttling + { + /// API throttling addMetric(request, S3MetricType::Throttling); } else if (status_code >= 300) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 88251b964e2..eb65460ce13 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -20,6 +20,7 @@ #include #include + namespace Aws::Http::Standard { class StandardHttpResponse; @@ -27,18 +28,20 @@ class StandardHttpResponse; namespace DB { - class Context; } + namespace DB::S3 { + class ClientFactory; class PocoHTTPClient; + struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { - std::function per_request_configuration; + std::function per_request_configuration; String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; @@ -54,13 +57,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; - std::function error_report; + std::function error_report; void updateSchemeAndRegion(); private: PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -70,13 +73,13 @@ private: bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_ - ); + std::function error_report_); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. friend ClientFactory; }; + class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse { public: @@ -116,10 +119,12 @@ private: Aws::Utils::Stream::ResponseStream body_stream; }; + class PocoHTTPClient : public Aws::Http::HttpClient { public: explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration); + explicit PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration); ~PocoHTTPClient() override = default; std::shared_ptr MakeRequest( @@ -166,14 +171,14 @@ protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; - std::function per_request_configuration; - std::function error_report; + std::function per_request_configuration; + std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; - unsigned int s3_max_redirects; + unsigned int s3_max_redirects = 0; bool s3_use_adaptive_timeouts = true; - bool enable_s3_requests_logging; - bool for_disk_s3; + bool enable_s3_requests_logging = false; + bool for_disk_s3 = false; /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler /// (i.e. throttles GetObject, HeadObject) diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index ef7af2d01ba..abec907778c 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -15,7 +15,10 @@ namespace DB::S3 std::shared_ptr PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const { - return std::make_shared(static_cast(client_configuration)); + if (client_configuration.userAgent.starts_with("ClickHouse")) + return std::make_shared(static_cast(client_configuration)); + else /// This client is created inside the AWS SDK with default settings to obtain ECS credentials from localhost. + return std::make_shared(client_configuration); } std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest( diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index fead18315d8..9c80b377661 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -1,8 +1,8 @@ #include -#include -#include -#include "Common/Macros.h" + #if USE_AWS_S3 +#include +#include #include #include #include @@ -10,6 +10,7 @@ #include + namespace DB { @@ -40,21 +41,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce\.amazonaws\.com(:\d{1,5})?)"); - /// Case when bucket name and key represented in path of S3 URL. + /// Case when bucket name and key represented in the path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); - static constexpr auto S3 = "S3"; - static constexpr auto S3EXPRESS = "S3EXPRESS"; - static constexpr auto COSN = "COSN"; - static constexpr auto COS = "COS"; - static constexpr auto OBS = "OBS"; - static constexpr auto OSS = "OSS"; - static constexpr auto EOS = "EOS"; - if (allow_archive_path_syntax) std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); else @@ -85,7 +78,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) URIConverter::modifyURI(uri, mapper); } - storage_name = S3; + storage_name = "S3"; if (uri.getHost().empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI."); @@ -93,11 +86,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Extract object version ID from query string. bool has_version_id = false; for (const auto & [query_key, query_value] : uri.getQueryParameters()) + { if (query_key == "versionId") { version_id = query_value; has_version_id = true; } + } /// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter, /// '?' can not be used as a wildcard, otherwise it will be ambiguous. @@ -129,15 +124,8 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) } boost::to_upper(name); - /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info - if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString(name)); - - if (name == COS) - storage_name = COSN; + if (name == "COS") + storage_name = "COSN"; else storage_name = name; } @@ -148,13 +136,22 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) validateBucket(bucket, uri); } else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI."); + { + /// Custom endpoint, e.g. a public domain of Cloudflare R2, + /// which could be served by a custom server-side code. + storage_name = "S3"; + bucket = "default"; + is_virtual_hosted_style = false; + endpoint = uri.getScheme() + "://" + uri.getAuthority(); + if (!uri.getPath().empty()) + key = uri.getPath().substr(1); + } } void URI::addRegionToURI(const std::string ®ion) { - if (auto pos = endpoint.find("amazonaws.com"); pos != std::string::npos) - endpoint = endpoint.substr(0, pos) + region + "." + endpoint.substr(pos); + if (auto pos = endpoint.find(".amazonaws.com"); pos != std::string::npos) + endpoint = endpoint.substr(0, pos) + "." + region + endpoint.substr(pos); } void URI::validateBucket(const String & bucket, const Poco::URI & uri) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 80e2da96cd4..c8d0b28cd15 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -1,14 +1,14 @@ #pragma once -#include -#include - #include "config.h" #if USE_AWS_S3 +#include +#include #include + namespace DB::S3 { @@ -23,7 +23,7 @@ namespace DB::S3 struct URI { Poco::URI uri; - // Custom endpoint if URI scheme is not S3. + // Custom endpoint if URI scheme, if not S3. std::string endpoint; std::string bucket; std::string key; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index a794cdbcf05..59040bf1fea 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -294,21 +294,21 @@ void RequestSettings::finishInit(const DB::Settings & settings, bool validate_se /// to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. /// But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").get() : settings.s3_max_get_rps) + if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").safeGet() : settings.s3_max_get_rps) { size_t default_max_get_burst = settings.s3_max_get_burst ? settings.s3_max_get_burst : (Throttler::default_burst_seconds * max_get_rps); - size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").get() : default_max_get_burst; + size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").safeGet() : default_max_get_burst; get_request_throttler = std::make_shared(max_get_rps, max_get_burst); } - if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").get() : settings.s3_max_put_rps) + if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").safeGet() : settings.s3_max_put_rps) { size_t default_max_put_burst = settings.s3_max_put_burst ? settings.s3_max_put_burst : (Throttler::default_burst_seconds * max_put_rps); - size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").get() : default_max_put_burst; + size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").safeGet() : default_max_put_burst; put_request_throttler = std::make_shared(max_put_rps, max_put_burst); } } diff --git a/src/IO/SnappyWriteBuffer.cpp b/src/IO/SnappyWriteBuffer.cpp index ca40d0656d1..0e02b48e1e0 100644 --- a/src/IO/SnappyWriteBuffer.cpp +++ b/src/IO/SnappyWriteBuffer.cpp @@ -16,7 +16,13 @@ namespace ErrorCodes } SnappyWriteBuffer::SnappyWriteBuffer(std::unique_ptr out_, size_t buf_size, char * existing_memory, size_t alignment) - : BufferWithOwnMemory(buf_size, existing_memory, alignment), out(std::move(out_)) + : SnappyWriteBuffer(*out_, buf_size, existing_memory, alignment) +{ + out_holder = std::move(out_); +} + +SnappyWriteBuffer::SnappyWriteBuffer(WriteBuffer & out_, size_t buf_size, char * existing_memory, size_t alignment) + : BufferWithOwnMemory(buf_size, existing_memory, alignment), out(&out_) { } diff --git a/src/IO/SnappyWriteBuffer.h b/src/IO/SnappyWriteBuffer.h index 2ff86fb64ef..b7a084d0f80 100644 --- a/src/IO/SnappyWriteBuffer.h +++ b/src/IO/SnappyWriteBuffer.h @@ -18,6 +18,12 @@ public: char * existing_memory = nullptr, size_t alignment = 0); + explicit SnappyWriteBuffer( + WriteBuffer & out_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + ~SnappyWriteBuffer() override; void finalizeImpl() override { finish(); } @@ -28,7 +34,9 @@ private: void finishImpl(); void finish(); - std::unique_ptr out; + WriteBuffer * out; + std::unique_ptr out_holder; + bool finished = false; String uncompress_buffer; diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 4759f96a235..84b1079b824 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -64,7 +64,8 @@ public: } bytes += bytes_in_buffer; - pos = working_buffer.begin(); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + nextimpl_working_buffer_offset = 0; } /// Calling finalize() in the destructor of derived classes is a bad practice. @@ -164,6 +165,11 @@ protected: bool finalized = false; bool canceled = false; + /// The number of bytes to preserve from the initial position of `working_buffer` + /// buffer. Apparently this is an additional out-parameter for nextImpl(), + /// not a real field. + size_t nextimpl_working_buffer_offset = 0; + private: /** Write the data in the buffer (from the beginning of the buffer to the current position). * Throw an exception if something is wrong. diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 5ed4dbdc787..ffb38a384a0 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -183,6 +183,7 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_ , socket(socket_) , peer_address(socket.peerAddress()) , our_address(socket.address()) + , write_event(ProfileEvents::end()) , socket_description("socket (" + peer_address.toString() + ")") { } diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp new file mode 100644 index 00000000000..9da46ee2d10 --- /dev/null +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -0,0 +1,210 @@ +#include +#include +#include + + +namespace +{ + +template +void setValue(T * typed_ptr, std::type_identity_t val) +{ + memcpy(static_cast(typed_ptr), &val, sizeof(T)); +} + +} + +namespace DB +{ + +WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size) + : WriteBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size) +{} + +WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size) + : WriteBufferFromPocoSocket( + socket_, write_event_, + std::clamp(buf_size, sizeof(*chunk_size_ptr) + 1, static_cast(std::numeric_limits>::max()))), + log(getLogger("Protocol")) +{} + +void WriteBufferFromPocoSocketChunked::enableChunked() +{ + chunked = true; + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + /// Pretend finishChunk() was just called to prevent sending empty chunk if finishChunk() called immediately + last_finish_chunk = chunk_size_ptr; +} + +void WriteBufferFromPocoSocketChunked::finishChunk() +{ + if (!chunked) + return; + + if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) + { + /// Prevent duplicate finish chunk (and finish chunk right after enableChunked()) + if (chunk_size_ptr == last_finish_chunk) + return; + + /// If current chunk is empty it means we are finishing a chunk previously sent by next(), + /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. + /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer + /// so it should be a beginning of the buffer. + + chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); + + setValue(chunk_size_ptr, 0); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Previously finished chunk wasn't sent yet + if (last_finish_chunk == chunk_size_ptr) + { + chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + } + + /// Fill up current chunk size + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); + + if (!chunk_started) + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + else + { + chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } + + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + + if (available() < sizeof(*chunk_size_ptr)) + { + finishing = available(); + pos += available(); + chunk_size_ptr = reinterpret_cast(pos); + last_finish_chunk = chunk_size_ptr; + return; + } + + /// Buffer end-of-chunk + setValue(reinterpret_cast(pos), 0); + pos += sizeof(*chunk_size_ptr); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; +} + +WriteBufferFromPocoSocketChunked::~WriteBufferFromPocoSocketChunked() +{ + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void WriteBufferFromPocoSocketChunked::nextImpl() +{ + if (!chunked) + { + WriteBufferFromPocoSocket::nextImpl(); + return; + } + + /// next() after finishChunk at the end of the buffer + if (finishing < sizeof(*chunk_size_ptr)) + { + pos -= finishing; + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Send end-of-chunk directly + UInt32 s = 0; + socketSendBytes(reinterpret_cast(&s), sizeof(s)); + + finishing = sizeof(*chunk_size_ptr); + + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Prevent sending empty chunk + if (offset() == sizeof(*chunk_size_ptr)) + { + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + return; + } + + /// Finish chunk at the end of the buffer + if (working_buffer.end() - reinterpret_cast(chunk_size_ptr) <= static_cast(sizeof(*chunk_size_ptr))) + { + pos = reinterpret_cast(chunk_size_ptr); + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = nullptr; + + return; + } + + bool initialize_last_finish_chunk = false; + if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk + { + pos -= sizeof(*chunk_size_ptr); + initialize_last_finish_chunk = true; + } + else // fill up current chunk size + { + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); + if (!chunk_started) + { + chunk_started = true; + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + } + else + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = initialize_last_finish_chunk ? chunk_size_ptr : nullptr; +} + +void WriteBufferFromPocoSocketChunked::finalizeImpl() +{ + if (chunked && offset() == sizeof(*chunk_size_ptr)) + pos -= sizeof(*chunk_size_ptr); + WriteBufferFromPocoSocket::finalizeImpl(); +} + +} diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h new file mode 100644 index 00000000000..13a277e3bfb --- /dev/null +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket +{ +public: + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + + void enableChunked(); + void finishChunk(); + ~WriteBufferFromPocoSocketChunked() override; + +protected: + void nextImpl() override; + void finalizeImpl() override; + Poco::Net::SocketAddress peerAddress() const { return peer_address; } + Poco::Net::SocketAddress ourAddress() const { return our_address; } + +private: + LoggerPtr log; + bool chunked = false; + UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk + bool chunk_started = false; // chunk started flag + UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer + size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker +}; + +} diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index e046e837689..52bcdc6bbb4 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -82,13 +82,14 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, - DateTimeSubsecondPart * fractional) + DateTimeSubsecondPart * fractional, + const char * allowed_date_delimiters = nullptr) { auto on_error = [&](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -170,22 +171,36 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = 3; readDecimalNumber<3>(fractional->value, digits + 10); } + else if constexpr (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } return ReturnType(true); } else if (num_digits == 10 && !year && !has_time) { + if (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Strict best effort parsing doesn't allow timestamps"); + /// This is unix timestamp. readDecimalNumber<10>(res, digits); return ReturnType(true); } else if (num_digits == 9 && !year && !has_time) { + if (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Strict best effort parsing doesn't allow timestamps"); + /// This is unix timestamp. readDecimalNumber<9>(res, digits); return ReturnType(true); } else if (num_digits == 14 && !year && !has_time) { + if (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Strict best effort parsing doesn't allow date times without separators"); + /// This is YYYYMMDDhhmmss readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -197,6 +212,9 @@ ReturnType parseDateTimeBestEffortImpl( } else if (num_digits == 8 && !year) { + if (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Strict best effort parsing doesn't allow date times without separators"); + /// This is YYYYMMDD readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -204,6 +222,9 @@ ReturnType parseDateTimeBestEffortImpl( } else if (num_digits == 6) { + if (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Strict best effort parsing doesn't allow date times without separators"); + /// This is YYYYMM or hhmmss if (!year && !month) { @@ -272,6 +293,9 @@ ReturnType parseDateTimeBestEffortImpl( else return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits); } + + if (!isSymbolIn(delimiter_after_year, allowed_date_delimiters)) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: '{}' delimiter between date parts is not allowed", delimiter_after_year); } } else if (num_digits == 2 || num_digits == 1) @@ -329,7 +353,7 @@ ReturnType parseDateTimeBestEffortImpl( if (month && !day_of_month) day_of_month = hour_or_day_of_month_or_month; } - else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) + else if ((!in.eof() && isSymbolIn(*in.position(), allowed_date_delimiters)) && (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))) { if (day_of_month) return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is duplicated"); @@ -378,7 +402,7 @@ ReturnType parseDateTimeBestEffortImpl( if (month > 12) std::swap(month, day_of_month); - if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) + if ((!in.eof() && isSymbolIn(*in.position(), allowed_date_delimiters)) && (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))) { if (year) return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year component is duplicated"); @@ -403,9 +427,16 @@ ReturnType parseDateTimeBestEffortImpl( else { if (day_of_month) + { + if (strict && hour) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: hour component is duplicated"); + hour = hour_or_day_of_month_or_month; + } else + { day_of_month = hour_or_day_of_month_or_month; + } } } else if (num_digits != 0) @@ -446,6 +477,11 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = num_digits; readDecimalNumber(fractional->value, num_digits, digits); } + else if (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } } else if (c == '+' || c == '-') { @@ -582,12 +618,24 @@ ReturnType parseDateTimeBestEffortImpl( return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully"); if (!day_of_month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is required"); day_of_month = 1; + } + if (!month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month is required"); month = 1; + } if (!year) { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year is required"); + /// If year is not specified, it will be the current year if the date is unknown or not greater than today, /// otherwise it will be the previous year. /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". @@ -641,6 +689,20 @@ ReturnType parseDateTimeBestEffortImpl( } }; + if constexpr (strict) + { + if constexpr (is_64) + { + if (year < 1900) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime64: year {} is less than minimum supported year 1900", year); + } + else + { + if (year < 1970) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year {} is less than minimum supported year 1970", year); + } + } + if (has_time_zone_offset) { res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); @@ -654,20 +716,20 @@ ReturnType parseDateTimeBestEffortImpl( return ReturnType(true); } -template -ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) +template +ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters = nullptr) { time_t whole; DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters); } @@ -730,4 +792,24 @@ bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + } diff --git a/src/IO/parseDateTimeBestEffort.h b/src/IO/parseDateTimeBestEffort.h index 22af44f9e76..6dd052b67a3 100644 --- a/src/IO/parseDateTimeBestEffort.h +++ b/src/IO/parseDateTimeBestEffort.h @@ -63,4 +63,12 @@ void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); + +/// More strict version of best effort parsing. Requires day, month and year to be present, checks for allowed +/// delimiters between date components, makes additional correctness checks. Used in schema inference if date times. +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); + } diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 0ec28f80072..c0bf7fcb28a 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -206,11 +206,6 @@ TEST(S3UriTest, validPatterns) } } -TEST_P(S3UriTest, invalidPatterns) -{ - ASSERT_ANY_THROW(S3::URI new_uri(GetParam())); -} - TEST(S3UriTest, versionIdChecks) { for (const auto& test_case : TestCases) @@ -223,19 +218,5 @@ TEST(S3UriTest, versionIdChecks) } } -INSTANTIATE_TEST_SUITE_P( - S3, - S3UriTest, - testing::Values( - "https:///", - "https://.s3.amazonaws.com/key", - "https://s3.amazonaws.com/key", - "https://jokserfn.s3amazonaws.com/key", - "https://s3.amazonaws.com//", - "https://amazonaws.com/", - "https://amazonaws.com//", - "https://amazonaws.com//key")); - } - #endif diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index df1c0aa1f2a..2a594839c6a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -301,11 +301,11 @@ const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const Da column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); column.type = std::make_shared(); - const auto * cast_type_constant_node = &addColumn(std::move(column)); + const auto * cast_type_constant_node = &addColumn(column); ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); + auto func_base_cast = createInternalCast(ColumnWithTypeAndName{node_to_cast.result_type, node_to_cast.result_name}, cast_type, CastType::nonAccurate, {}); - return addFunction(func_builder_cast, std::move(children), result_name); + return addFunction(func_base_cast, std::move(children), result_name); } const ActionsDAG::Node & ActionsDAG::addFunctionImpl( @@ -1547,11 +1547,11 @@ ActionsDAG ActionsDAG::makeConvertingActions( const auto * left_arg = dst_node; CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name}; - FunctionOverloadResolverPtr func_builder_cast - = createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic)); + ColumnWithTypeAndName left_column{nullptr, dst_node->result_type, {}}; + auto func_base_cast = createInternalCast(std::move(left_column), res_elem.type, CastType::nonAccurate, std::move(diagnostic)); NodeRawConstPtrs children = { left_arg, right_arg }; - dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_base_cast, std::move(children), {}); } if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e1b7e92ee5d..368eb8174f0 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -131,7 +131,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Invalid type in set. Expected tuple, got {}", String(value.getTypeName())); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); size_t tuple_size = tuple.size(); if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", @@ -233,7 +233,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type of set. Expected tuple, got {}", function_result.getTypeName()); - tuple = &function_result.get(); + tuple = &function_result.safeGet(); } /// Tuple can be represented as a literal in AST. @@ -246,7 +246,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type in set. Expected tuple, got {}", literal->value.getTypeName()); - tuple = &literal->value.get(); + tuple = &literal->value.safeGet(); } assert(tuple || func); @@ -332,14 +332,14 @@ Block createBlockForSet( if (type_index == TypeIndex::Tuple) { const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else if (type_index == TypeIndex::Array) { const auto* right_arg_array_type = assert_cast(right_arg_type.get()); - size_t right_arg_array_size = right_arg_value.get().size(); + size_t right_arg_array_size = right_arg_value.safeGet().size(); DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else throw_unsupported_type(right_arg_type); diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 356bffa75e9..d59fd35df77 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -100,6 +101,7 @@ private: const String database_name; std::set external_tables; + mutable std::unordered_set with_aliases; bool only_replace_current_database_function = false; bool only_replace_in_join = false; @@ -117,6 +119,10 @@ private: void visit(ASTSelectQuery & select, ASTPtr &) const { + if (select.recursive_with) + for (const auto & child : select.with()->children) + with_aliases.insert(child->as()->name); + if (select.tables()) tryVisit(select.refTables()); @@ -165,6 +171,9 @@ private: /// There is temporary table with such name, should not be rewritten. if (external_tables.contains(identifier.shortName())) return; + /// This is WITH RECURSIVE alias. + if (with_aliases.contains(identifier.name())) + return; auto qualified_identifier = std::make_shared(database_name, identifier.name()); if (!identifier.alias.empty()) @@ -201,7 +210,7 @@ private: if (literal_value.getType() != Field::Types::String) continue; - auto dictionary_name = literal_value.get(); + auto dictionary_name = literal_value.safeGet(); auto qualified_dictionary_name = context->getExternalDictionariesLoader().qualifyDictionaryNameWithDatabase(dictionary_name, context); literal_value = qualified_dictionary_name.getFullName(); } diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 13c70b38543..60db406ca72 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -11,11 +11,15 @@ #include #include #include +#include +#include #include #include #include +#include #include +#include namespace fs = std::filesystem; @@ -88,6 +92,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0) , boundary_alignment(settings.boundary_alignment) , load_metadata_threads(settings.load_metadata_threads) + , load_metadata_asynchronously(settings.load_metadata_asynchronously) , write_cache_per_user_directory(settings.write_cache_per_user_id_directory) , keep_current_size_to_max_ratio(1 - settings.keep_free_space_size_ratio) , keep_current_elements_to_max_ratio(1 - settings.keep_free_space_elements_ratio) @@ -136,7 +141,17 @@ const FileCache::UserInfo & FileCache::getInternalUser() bool FileCache::isInitialized() const { - return is_initialized.load(std::memory_order_seq_cst); + return is_initialized; +} + +void FileCache::throwInitExceptionIfNeeded() +{ + if (load_metadata_asynchronously) + return; + + std::lock_guard lock(init_mutex); + if (init_exception) + std::rethrow_exception(init_exception); } const String & FileCache::getBasePath() const @@ -170,6 +185,35 @@ void FileCache::assertInitialized() const } void FileCache::initialize() +{ + // Prevent initialize() from running twice. This may be caused by two cache disks being created with the same path (see integration/test_filesystem_cache). + callOnce(initialize_called, [&] { + bool need_to_load_metadata = fs::exists(getBasePath()); + try + { + if (!need_to_load_metadata) + fs::create_directories(getBasePath()); + status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + } + catch (...) + { + init_exception = std::current_exception(); + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } + + if (load_metadata_asynchronously) + { + load_metadata_main_thread = ThreadFromGlobalPool([this, need_to_load_metadata] { initializeImpl(need_to_load_metadata); }); + } + else + { + initializeImpl(need_to_load_metadata); + } + }); +} + +void FileCache::initializeImpl(bool load_metadata) { std::lock_guard lock(init_mutex); @@ -178,16 +222,10 @@ void FileCache::initialize() try { - if (fs::exists(getBasePath())) - { + if (load_metadata) loadMetadata(); - } - else - { - fs::create_directories(getBasePath()); - } - status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + metadata.startup(); } catch (...) { @@ -196,8 +234,6 @@ void FileCache::initialize() throw; } - metadata.startup(); - if (keep_current_size_to_max_ratio != 1 || keep_current_elements_to_max_ratio != 1) { keep_up_free_space_ratio_task = Context::getGlobalContextInstance()->getSchedulePool().createTask(log->name(), [this] { freeSpaceRatioKeepingThreadFunc(); }); @@ -205,6 +241,7 @@ void FileCache::initialize() } is_initialized = true; + LOG_TEST(log, "Initialized cache from {}", metadata.getBaseDirectory()); } CachePriorityGuard::Lock FileCache::lockCache() const @@ -804,7 +841,8 @@ bool FileCache::tryReserve( const size_t size, FileCacheReserveStat & reserve_stat, const UserInfo & user, - size_t lock_wait_timeout_milliseconds) + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason) { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); @@ -817,6 +855,7 @@ bool FileCache::tryReserve( if (cache_is_being_resized.load(std::memory_order_relaxed)) { ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); + failure_reason = "cache is being resized"; return false; } @@ -824,6 +863,7 @@ bool FileCache::tryReserve( if (!cache_lock) { ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + failure_reason = "cache contention"; return false; } @@ -847,6 +887,7 @@ bool FileCache::tryReserve( LOG_TEST(log, "Query limit exceeded, space reservation failed, " "recache_on_query_limit_exceeded is disabled (while reserving for {}:{})", file_segment.key(), file_segment.offset()); + failure_reason = "query limit exceeded"; return false; } @@ -877,6 +918,7 @@ bool FileCache::tryReserve( if (!query_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, {}, user.user_id, cache_lock)) { + failure_reason = "cannot evict enough space for query limit"; return false; } @@ -891,11 +933,15 @@ bool FileCache::tryReserve( if (!main_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, queue_iterator, user.user_id, cache_lock)) { + failure_reason = "cannot evict enough space"; return false; } if (!file_segment.getKeyMetadata()->createBaseDirectory()) + { + failure_reason = "not enough space on device"; return false; + } if (eviction_candidates.size() > 0) { @@ -1188,7 +1234,6 @@ void FileCache::loadMetadataImpl() std::vector loading_threads; std::exception_ptr first_exception; std::mutex set_exception_mutex; - std::atomic stop_loading = false; LOG_INFO(log, "Loading filesystem cache with {} threads from {}", load_metadata_threads, metadata.getBaseDirectory()); @@ -1198,7 +1243,7 @@ void FileCache::loadMetadataImpl() { loading_threads.emplace_back([&] { - while (!stop_loading) + while (!stop_loading_metadata) { try { @@ -1215,7 +1260,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; return; } } @@ -1228,7 +1273,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; break; } } @@ -1415,6 +1460,11 @@ FileCache::~FileCache() void FileCache::deactivateBackgroundOperations() { shutdown.store(true); + + stop_loading_metadata = true; + if (load_metadata_main_thread.joinable()) + load_metadata_main_thread.join(); + metadata.shutdown(); if (keep_up_free_space_ratio_task) keep_up_free_space_ratio_task->deactivate(); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07be802a940..8e8f01ff39e 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -82,6 +83,9 @@ public: bool isInitialized() const; + /// Throws if `!load_metadata_asynchronously` and there is an exception in `init_exception` + void throwInitExceptionIfNeeded(); + const String & getBasePath() const; static Key createKeyForPath(const String & path); @@ -165,7 +169,8 @@ public: size_t size, FileCacheReserveStat & stat, const UserInfo & user, - size_t lock_wait_timeout_milliseconds); + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason); std::vector getFileSegmentInfos(const UserID & user_id); @@ -198,6 +203,9 @@ private: const size_t bypass_cache_threshold; const size_t boundary_alignment; size_t load_metadata_threads; + const bool load_metadata_asynchronously; + std::atomic stop_loading_metadata = false; + ThreadFromGlobalPool load_metadata_main_thread; const bool write_cache_per_user_directory; BackgroundSchedulePool::TaskHolder keep_up_free_space_ratio_task; @@ -209,6 +217,7 @@ private: std::exception_ptr init_exception; std::atomic is_initialized = false; + OnceFlag initialize_called; mutable std::mutex init_mutex; std::unique_ptr status_file; std::atomic shutdown = false; @@ -246,6 +255,8 @@ private: */ FileCacheQueryLimitPtr query_limit; + void initializeImpl(bool load_metadata); + void assertInitialized() const; void assertCacheCorrectness(); diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index c68ff3183c6..e162d6b7551 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -65,6 +65,9 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin if (has("load_metadata_threads")) load_metadata_threads = get_uint("load_metadata_threads"); + if (has("load_metadata_asynchronously")) + load_metadata_asynchronously = get_uint("load_metadata_asynchronously"); + if (boundary_alignment > max_file_segment_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`"); diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 93ded202947..72a2b6c3369 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -32,6 +32,7 @@ struct FileCacheSettings size_t background_download_queue_size_limit = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT; size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS; + bool load_metadata_asynchronously = false; bool write_cache_per_user_id_directory = false; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index c46fb978ae4..cfbdfbaa257 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -502,7 +502,11 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const return metadata->tryLock(); } -bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat) +bool FileSegment::reserve( + size_t size_to_reserve, + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason, + FileCacheReserveStat * reserve_stat) { if (!size_to_reserve) throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); @@ -554,7 +558,7 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli if (!reserve_stat) reserve_stat = &dummy_stat; - bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); + bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds, failure_reason); if (!reserved) setDownloadFailedUnlocked(lock()); diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 25ffb880b45..e90ebdbf8fe 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -201,7 +201,11 @@ public: /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Returns true if reservation was successful, false otherwise. - bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); + bool reserve( + size_t size_to_reserve, + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason, + FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. void write(char * from, size_t size, size_t offset_in_file); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 7e4b76d3cc6..6399691bcf6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -705,7 +705,8 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalavailable(); - if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds)) + std::string failure_reason; + if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds, failure_reason)) { LOG_TEST( log, "Failed to reserve space during background download " diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index a3fe8c2e779..4312b35e18c 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -128,7 +128,7 @@ namespace bool isQueryCacheRelatedSetting(const String & setting_name) { - return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache"); + return (setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache")) && setting_name != "query_cache_tag"; } class RemoveQueryCacheSettingsMatcher @@ -242,11 +242,18 @@ QueryCache::Key::Key( , expires_at(expires_at_) , is_compressed(is_compressed_) , query_string(queryStringFromAST(ast_)) + , tag(settings.query_cache_tag) { } -QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles +QueryCache::Key::Key( + ASTPtr ast_, + const String & current_database, + const Settings & settings, + std::optional user_id_, + const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) + /// ^^ dummy values for everything != AST, current database, user name/roles { } @@ -612,9 +619,18 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size); } -void QueryCache::clear() +void QueryCache::clear(const std::optional & tag) { - cache.clear(); + if (tag) + { + auto predicate = [tag](const Key & key, const Cache::MappedPtr &) { return key.tag == tag.value(); }; + cache.remove(predicate); + } + else + { + cache.clear(); + } + std::lock_guard lock(mutex); times_executed.clear(); } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 461197cac32..64407633a8d 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -88,6 +88,11 @@ public: /// SYSTEM.QUERY_CACHE. const String query_string; + /// A tag (namespace) for distinguish multiple entries of the same query. + /// This member has currently no use besides that SYSTEM.QUERY_CACHE can populate the 'tag' column conveniently without having to + /// compute the tag from the query AST. + const String tag; + /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, const String & current_database, @@ -99,7 +104,10 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, + const String & current_database, + const Settings & settings, + std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; @@ -203,7 +211,7 @@ public: Reader createReader(const Key & key); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); - void clear(); + void clear(const std::optional & tag); size_t sizeInBytes() const; size_t count() const; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e6ebf6ad50c..e43bbacdbc5 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -75,7 +75,8 @@ void WriteBufferToFileSegment::nextImpl() FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, &reserve_stat); + std::string failure_reason; + bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason, &reserve_stat); if (!ok) { @@ -84,9 +85,10 @@ void WriteBufferToFileSegment::nextImpl() reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: reason {}, {}(segment info: {})", bytes_to_write, file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", + failure_reason, reserve_stat_msg, file_segment->getInfoForLog() ); diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 39fdef23baa..daf1e300046 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -254,6 +254,8 @@ String toString(ClientInfo::Interface interface) return "LOCAL"; case ClientInfo::Interface::TCP_INTERSERVER: return "TCP_INTERSERVER"; + case ClientInfo::Interface::PROMETHEUS: + return "PROMETHEUS"; } return std::format("Unknown server interface ({}).", static_cast(interface)); diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index ca32b4c5cfa..48dea3cc3ea 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -38,6 +38,7 @@ public: POSTGRESQL = 5, LOCAL = 6, TCP_INTERSERVER = 7, + PROMETHEUS = 8, }; enum class HTTPMethod : uint8_t diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 59c98491c14..7b7bedc850d 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -113,6 +113,9 @@ Cluster::Address::Address( secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable; priority = Priority{config.getInt(config_prefix + ".priority", 1)}; + proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked"); + proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked"); + const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; auto default_port = config.getInt(port_type, 0); @@ -227,21 +230,37 @@ String Cluster::Address::toFullString(bool use_compact_format) const } } -Cluster::Address Cluster::Address::fromFullString(const String & full_string) +Cluster::Address Cluster::Address::fromFullString(std::string_view full_string) { - const char * address_begin = full_string.data(); - const char * address_end = address_begin + full_string.size(); - - const char * user_pw_end = strchr(full_string.data(), '@'); + std::string_view user_password; + if (auto pos = full_string.find('@'); pos != std::string_view::npos) + user_password = full_string.substr(pos + 1); /// parsing with the new shard{shard_index}[_replica{replica_index}] format - if (!user_pw_end && startsWith(full_string, "shard")) + if (user_password.empty() && full_string.starts_with("shard")) { - const char * underscore = strchr(full_string.data(), '_'); - Address address; - address.shard_index = parse(address_begin + strlen("shard")); - address.replica_index = underscore ? parse(underscore + strlen("_replica")) : 0; + + if (auto underscore_pos = full_string.find('_'); underscore_pos != std::string_view::npos) + { + address.shard_index = parse(full_string.substr(0, underscore_pos).substr(strlen("shard"))); + + if (full_string.substr(underscore_pos + 1).starts_with("replica")) + { + address.replica_index = parse(full_string.substr(underscore_pos + 1 + strlen("replica"))); + } + else if (full_string.substr(underscore_pos + 1).starts_with("all_replicas")) + { + address.replica_index = 0; + } + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect address '{}', should be in a form of `shardN_all_replicas` or `shardN_replicaM`", full_string); + } + else + { + address.shard_index = parse(full_string.substr(strlen("shard"))); + address.replica_index = 0; + } return address; } @@ -252,9 +271,13 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) /// - credentials are exposed in file name; /// - the file name can be too long. + const char * address_begin = full_string.data(); + const char * address_end = address_begin + full_string.size(); + const char * user_pw_end = strchr(address_begin, '@'); + Protocol::Secure secure = Protocol::Secure::Disable; const char * secure_tag = "+secure"; - if (endsWith(full_string, secure_tag)) + if (full_string.ends_with(secure_tag)) { address_end -= strlen(secure_tag); secure = Protocol::Secure::Enable; @@ -425,7 +448,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, auto pool = ConnectionPoolFactory::instance().get( static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, - address.default_database, address.user, address.password, address.quota_key, + address.default_database, address.user, address.password, + address.proto_send_chunked, address.proto_recv_chunked, + address.quota_key, address.cluster, address.cluster_secret, "server", address.compression, address.secure, address.priority); @@ -589,6 +614,8 @@ void Cluster::addShard( replica.default_database, replica.user, replica.password, + replica.proto_send_chunked, + replica.proto_recv_chunked, replica.quota_key, replica.cluster, replica.cluster_secret, @@ -744,6 +771,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti address.default_database, address.user, address.password, + address.proto_send_chunked, + address.proto_recv_chunked, address.quota_key, address.cluster, address.cluster_secret, diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index dc5790ac339..82d77941b76 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -114,6 +114,8 @@ public: UInt16 port{0}; String user; String password; + String proto_send_chunked = "notchunked"; + String proto_recv_chunked = "notchunked"; String quota_key; /// For inter-server authorization @@ -166,7 +168,7 @@ public: String toFullString(bool use_compact_format) const; /// Returns address with only shard index and replica index or full address without shard index and replica index - static Address fromFullString(const String & address_full_string); + static Address fromFullString(std::string_view full_string); /// Returns resolved address if it does resolve. std::optional getResolvedAddress() const; diff --git a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp index 4f06f345b96..b9f7f37b338 100644 --- a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp +++ b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp @@ -22,7 +22,7 @@ ASTs splitTuple(const ASTPtr & node) if (const auto * literal = node->as(); literal && literal->value.getType() == Field::Types::Tuple) { ASTs result; - const auto & tuple = literal->value.get(); + const auto & tuple = literal->value.safeGet(); for (const auto & child : tuple) result.emplace_back(std::make_shared(child)); return result; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5413b568068..cfcaf437510 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -618,7 +619,7 @@ struct ContextSharedPart : boost::noncopyable /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - SHUTDOWN(log, "system logs", system_logs, shutdown()); + SHUTDOWN(log, "system logs", system_logs, flushAndShutdown()); LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); @@ -2956,6 +2957,9 @@ ProgressCallback Context::getProgressCallback() const void Context::setProcessListElement(QueryStatusPtr elem) { + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have process list element"); + /// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed. process_list_elem = elem; has_process_list_elem = elem.get(); @@ -3224,12 +3228,12 @@ QueryCachePtr Context::getQueryCache() const return shared->query_cache; } -void Context::clearQueryCache() const +void Context::clearQueryCache(const std::optional & tag) const { std::lock_guard lock(shared->mutex); if (shared->query_cache) - shared->query_cache->clear(); + shared->query_cache->clear(tag); } void Context::clearCaches() const @@ -4255,7 +4259,7 @@ std::shared_ptr Context::getS3QueueLog() const if (!shared->system_logs) return {}; - return shared->system_logs->s3_queue_log; + return shared->system_logs->s3queue_log; } std::shared_ptr Context::getAzureQueueLog() const @@ -4312,13 +4316,13 @@ std::shared_ptr Context::getBlobStorageLog() const return shared->system_logs->blob_storage_log; } -std::vector Context::getSystemLogs() const +SystemLogs Context::getSystemLogs() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; - return shared->system_logs->logs; + return *shared->system_logs; } std::optional Context::getDashboards() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d5e35c3e4b3..e0c69471e60 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -48,6 +48,8 @@ namespace DB class ASTSelectQuery; +class SystemLogs; + struct ContextSharedPart; class ContextAccess; class ContextAccessWrapper; @@ -1066,7 +1068,7 @@ public: void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows); void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; - void clearQueryCache() const; + void clearQueryCache(const std::optional & tag) const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. @@ -1150,7 +1152,7 @@ public: std::shared_ptr getBackupLog() const; std::shared_ptr getBlobStorageLog() const; - std::vector getSystemLogs() const; + SystemLogs getSystemLogs() const; using Dashboards = std::vector>; std::optional getDashboards() const; diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp index 084bb0a1bb9..220355e0741 100644 --- a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp @@ -45,7 +45,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) if (!identifier || !literal || literal->value.getType() != Field::Types::String) continue; - String regexp = likePatternToRegexp(literal->value.get()); + String regexp = likePatternToRegexp(literal->value.safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; @@ -61,7 +61,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) match->arguments->children.push_back(it->second); unique_elems.push_back(std::move(match)); } - it->second->value.get().push_back(regexp); + it->second->value.safeGet().push_back(regexp); } } diff --git a/src/Interpreters/ConvertStringsToEnumVisitor.cpp b/src/Interpreters/ConvertStringsToEnumVisitor.cpp index 7cc95dc521b..d35baa92900 100644 --- a/src/Interpreters/ConvertStringsToEnumVisitor.cpp +++ b/src/Interpreters/ConvertStringsToEnumVisitor.cpp @@ -33,8 +33,8 @@ String makeStringsEnum(const std::set & values) void changeIfArguments(ASTPtr & first, ASTPtr & second) { - String first_value = first->as()->value.get(); - String second_value = second->as()->value.get(); + String first_value = first->as()->value.safeGet(); + String second_value = second->as()->value.safeGet(); std::set values; values.insert(first_value); @@ -59,9 +59,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other) { std::set values; - for (const auto & item : array_to->as()->value.get()) - values.insert(item.get()); - values.insert(other->as()->value.get()); + for (const auto & item : array_to->as()->value.safeGet()) + values.insert(item.safeGet()); + values.insert(other->as()->value.safeGet()); String enum_string = makeStringsEnum(values); @@ -168,7 +168,7 @@ void ConvertStringsToEnumMatcher::visit(ASTFunction & function_node, Data & data if (literal_to->value.getTypeName() != "Array" || literal_other->value.getTypeName() != "String") return; - Array array_to = literal_to->value.get(); + Array array_to = literal_to->value.safeGet(); if (array_to.empty()) return; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fa197d59c13..6e08dd5e2cc 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -538,7 +538,7 @@ void DatabaseReplicatedTask::createSyncedNodeIfNeed(const ZooKeeperPtr & zookeep /// Bool type is really weird, sometimes it's Bool and sometimes it's UInt64... assert(value.getType() == Field::Types::Bool || value.getType() == Field::Types::UInt64); - if (!value.get()) + if (!value.safeGet()) return; zookeeper->createIfNotExists(getSyncedNodePath(), ""); diff --git a/src/Interpreters/HashJoin/AddedColumns.cpp b/src/Interpreters/HashJoin/AddedColumns.cpp index 930a352744d..21cb6e401ed 100644 --- a/src/Interpreters/HashJoin/AddedColumns.cpp +++ b/src/Interpreters/HashJoin/AddedColumns.cpp @@ -15,48 +15,115 @@ JoinOnKeyColumns::JoinOnKeyColumns(const Block & block, const Names & key_names_ { } -template<> void AddedColumns::buildOutput() -{ -} +template<> +void AddedColumns::buildOutput() {} + +template<> +void AddedColumns::buildJoinGetOutput() {} + +template<> +template +void AddedColumns::buildOutputFromBlocks() {} template<> void AddedColumns::buildOutput() { - for (size_t i = 0; i < this->size(); ++i) + if (!output_by_row_list) + buildOutputFromBlocks(); + else { - auto& col = columns[i]; - size_t default_count = 0; - auto apply_default = [&]() + if (join_data_avg_perkey_rows < output_by_row_list_threshold) + buildOutputFromBlocks(); + else { - if (default_count > 0) + for (size_t i = 0; i < this->size(); ++i) { - JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); - default_count = 0; - } - }; - - for (size_t j = 0; j < lazy_output.blocks.size(); ++j) - { - if (!lazy_output.blocks[j]) - { - default_count++; - continue; - } - apply_default(); - const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); - /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. - if (is_join_get) - { - if (auto * nullable_col = typeid_cast(col.get()); - nullable_col && !column_from_block.column->isNullable()) + auto & col = columns[i]; + for (auto row_ref_i : lazy_output.row_refs) { - nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); - continue; + if (row_ref_i) + { + const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); + for (auto it = row_ref_list->begin(); it.ok(); ++it) + col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); + } + else + type_name[i].type->insertDefaultInto(*col); } } - col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); } - apply_default(); + } +} + +template<> +void AddedColumns::buildJoinGetOutput() +{ + for (size_t i = 0; i < this->size(); ++i) + { + auto & col = columns[i]; + for (auto row_ref_i : lazy_output.row_refs) + { + if (!row_ref_i) + { + type_name[i].type->insertDefaultInto(*col); + continue; + } + const auto * row_ref = reinterpret_cast(row_ref_i); + const auto & column_from_block = row_ref->block->getByPosition(right_indexes[i]); + if (auto * nullable_col = typeid_cast(col.get()); nullable_col && !column_from_block.column->isNullable()) + nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); + else + col->insertFrom(*column_from_block.column, row_ref->row_num); + } + } +} + +template<> +template +void AddedColumns::buildOutputFromBlocks() +{ + if (this->size() == 0) + return; + std::vector blocks; + std::vector row_nums; + blocks.reserve(lazy_output.row_refs.size()); + row_nums.reserve(lazy_output.row_refs.size()); + for (auto row_ref_i : lazy_output.row_refs) + { + if (row_ref_i) + { + if constexpr (from_row_list) + { + const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); + for (auto it = row_ref_list->begin(); it.ok(); ++it) + { + blocks.emplace_back(it->block); + row_nums.emplace_back(it->row_num); + } + } + else + { + const RowRef * row_ref = reinterpret_cast(row_ref_i); + blocks.emplace_back(row_ref->block); + row_nums.emplace_back(row_ref->row_num); + } + } + else + { + blocks.emplace_back(nullptr); + row_nums.emplace_back(0); + } + } + for (size_t i = 0; i < this->size(); ++i) + { + auto & col = columns[i]; + for (size_t j = 0; j < blocks.size(); ++j) + { + if (blocks[j]) + col->insertFrom(*blocks[j]->getByPosition(right_indexes[i]).column, row_nums[j]); + else + type_name[i].type->insertDefaultInto(*col); + } } } @@ -72,29 +139,27 @@ void AddedColumns::applyLazyDefaults() } template<> -void AddedColumns::applyLazyDefaults() -{ -} +void AddedColumns::applyLazyDefaults() {} template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) +void AddedColumns::appendFromBlock(const RowRef * row_ref, const bool has_defaults) { if (has_defaults) applyLazyDefaults(); #ifndef NDEBUG - checkBlock(block); + checkBlock(*row_ref->block); #endif if (is_join_get) { size_t right_indexes_size = right_indexes.size(); for (size_t j = 0; j < right_indexes_size; ++j) { - const auto & column_from_block = block.getByPosition(right_indexes[j]); + const auto & column_from_block = row_ref->block->getByPosition(right_indexes[j]); if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); else - columns[j]->insertFrom(*column_from_block.column, row_num); + columns[j]->insertFrom(*column_from_block.column, row_ref->row_num); } } else @@ -102,22 +167,21 @@ void AddedColumns::appendFromBlock(const Block & block, size_t row_num,co size_t right_indexes_size = right_indexes.size(); for (size_t j = 0; j < right_indexes_size; ++j) { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); + const auto & column_from_block = row_ref->block->getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_ref->row_num); } } } template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) +void AddedColumns::appendFromBlock(const RowRef * row_ref, bool) { #ifndef NDEBUG - checkBlock(block); + checkBlock(*row_ref->block); #endif if (has_columns_to_add) { - lazy_output.blocks.emplace_back(reinterpret_cast(&block)); - lazy_output.row_nums.emplace_back(static_cast(row_num)); + lazy_output.row_refs.emplace_back(reinterpret_cast(row_ref)); } } template<> @@ -131,8 +195,7 @@ void AddedColumns::appendDefaultRow() { if (has_columns_to_add) { - lazy_output.blocks.emplace_back(0); - lazy_output.row_nums.emplace_back(0); + lazy_output.row_refs.emplace_back(0); } } } diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h index 13a7df6f498..f1b95a63be6 100644 --- a/src/Interpreters/HashJoin/AddedColumns.h +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -50,8 +50,7 @@ public: struct LazyOutput { - PaddedPODArray blocks; - PaddedPODArray row_nums; + PaddedPODArray row_refs; }; AddedColumns( @@ -76,8 +75,7 @@ public: if constexpr (lazy) { has_columns_to_add = num_columns_to_add > 0; - lazy_output.blocks.reserve(rows_to_add); - lazy_output.row_nums.reserve(rows_to_add); + lazy_output.row_refs.reserve(rows_to_add); } columns.reserve(num_columns_to_add); @@ -115,18 +113,22 @@ public: if (columns[j]->isNullable() && !saved_column->isNullable()) nullable_column_ptrs[j] = typeid_cast(columns[j].get()); } + join_data_avg_perkey_rows = join.getJoinedData()->avgPerKeyRows(); + output_by_row_list_threshold = join.getTableJoin().outputByRowListPerkeyRowsThreshold(); } size_t size() const { return columns.size(); } void buildOutput(); + void buildJoinGetOutput(); + ColumnWithTypeAndName moveColumn(size_t i) { return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); } - void appendFromBlock(const Block & block, size_t row_num, bool has_default); + void appendFromBlock(const RowRef * row_ref, bool has_default); void appendDefaultRow(); @@ -134,6 +136,8 @@ public: const IColumn & leftAsofKey() const { return *left_asof_key; } + static constexpr bool isLazy() { return lazy; } + Block left_block; std::vector join_on_keys; ExpressionActionsPtr additional_filter_expression; @@ -142,6 +146,9 @@ public: size_t rows_to_add; std::unique_ptr offsets_to_replicate; bool need_filter = false; + bool output_by_row_list = false; + size_t join_data_avg_perkey_rows = 0; + size_t output_by_row_list_threshold = 0; IColumn::Filter filter; void reserve(bool need_replicate) @@ -212,15 +219,22 @@ private: columns.back()->reserve(src_column.column->size()); type_name.emplace_back(src_column.type, src_column.name, qualified_name); } + + /** Build output from the blocks that extract from `RowRef` or `RowRefList`, to avoid block cache miss which may cause performance slow down. + * And This problem would happen it we directly build output from `RowRef` or `RowRefList`. + */ + template + void buildOutputFromBlocks(); }; /// Adapter class to pass into addFoundRowAll /// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, /// because they need to be filtered by additional_filter_expression. -class PreSelectedRows : public std::vector +class PreSelectedRows : public std::vector { public: - void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } + void appendFromBlock(const RowRef * row_ref, bool /* has_default */) { this->emplace_back(row_ref); } + static constexpr bool isLazy() { return false; } }; } diff --git a/src/Interpreters/HashJoin/FullHashJoin.cpp b/src/Interpreters/HashJoin/FullHashJoin.cpp index 5d058d10fc2..4cdb2e757a4 100644 --- a/src/Interpreters/HashJoin/FullHashJoin.cpp +++ b/src/Interpreters/HashJoin/FullHashJoin.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index a621ce16fb1..9c07a71e614 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -291,12 +291,13 @@ void HashJoin::dataMapInit(MapsVariant & map) { if (kind == JoinKind::Cross) return; - joinDispatchInit(kind, strictness, map); - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; + joinDispatchInit(kind, strictness, map, prefer_use_maps_all); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.create(data->type); }); if (reserve_num) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); } if (!data) @@ -327,9 +328,10 @@ size_t HashJoin::getTotalRowCount() const } else { + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; for (const auto & map : data->maps) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); } } @@ -367,9 +369,10 @@ size_t HashJoin::getTotalByteCount() const if (data->type != Type::CROSS) { + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; for (const auto & map : data->maps) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); } } return res; @@ -492,7 +495,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) } size_t rows = source_block.rows(); - + data->rows_to_join += rows; const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right); ColumnPtrMap all_key_columns(right_key_names.size()); for (const auto & column_name : right_key_names) @@ -520,6 +523,8 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) return true; } + bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; + size_t total_rows = 0; size_t total_bytes = 0; { @@ -592,7 +597,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) bool is_inserted = false; if (kind != JoinKind::Cross) { - joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) + joinDispatch(kind, strictness, data->maps[onexpr_idx], prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & map) { size_t size = HashJoinMethods>::insertFromBlockImpl( *this, @@ -608,10 +613,10 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) is_inserted); if (flag_per_row) - used_flags->reinit(stored_block); + used_flags->reinit, MapsAll>>(stored_block); else if (is_inserted) /// Number of buckets + 1 value from zero storage - used_flags->reinit(size + 1); + used_flags->reinit, MapsAll>>(size + 1); }); } @@ -642,7 +647,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) total_bytes = getTotalByteCount(); } } - + data->keys_to_join = total_rows; shrinkStoredBlocksToFit(total_bytes); return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); @@ -873,7 +878,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block keys.insert(std::move(key)); } - static_assert(!MapGetter::flagged, + static_assert(!MapGetter::flagged, "joinGet are not protected from hash table changes between block processing"); std::vector maps_vector; @@ -914,16 +919,34 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) materializeBlockInplace(block); } + bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; { std::vectormaps[0])> * > maps_vector; for (size_t i = 0; i < table_join->getClauses().size(); ++i) maps_vector.push_back(&data->maps[i]); - if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) + if (joinDispatch(kind, strictness, maps_vector, prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & maps_vector_) { - using MapType = typename MapGetter::Map; - Block remaining_block = HashJoinMethods::joinBlockImpl( - *this, block, sample_block_with_columns_to_add, maps_vector_); + Block remaining_block; + if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown maps type"); + } if (remaining_block.rows()) not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); else @@ -1023,7 +1046,8 @@ public: rows_added = fillColumnsFromMap(map, columns_right); }; - if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) + bool prefer_use_maps_all = parent.table_join->getMixedJoinExpression() != nullptr; + if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), prefer_use_maps_all, fill_callback)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); } @@ -1220,11 +1244,12 @@ void HashJoin::reuseJoinedData(const HashJoin & join) if (flag_per_row) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); + bool prefer_use_maps_all = join.table_join->getMixedJoinExpression() != nullptr; for (auto & map : data->maps) { - joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) + joinDispatch(kind, strictness, map, prefer_use_maps_all, [this](auto kind_, auto strictness_, auto & map_) { - used_flags->reinit(map_.getBufferSizeInCells(data->type) + 1); + used_flags->reinit, MapsAll>>(map_.getBufferSizeInCells(data->type) + 1); }); } } @@ -1304,7 +1329,9 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona additional_filter_expression->dumpActions()); } - bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); + bool is_supported = ((strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind))) + || ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Any || strictness == JoinStrictness::Anti) + && (isLeft(kind) || isRight(kind))) || (strictness == JoinStrictness::Any && (isInner(kind))); if (!is_supported) { throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, diff --git a/src/Interpreters/HashJoin/HashJoin.h b/src/Interpreters/HashJoin/HashJoin.h index 00f5ef6d214..d645b8e9273 100644 --- a/src/Interpreters/HashJoin/HashJoin.h +++ b/src/Interpreters/HashJoin/HashJoin.h @@ -345,6 +345,18 @@ public: size_t blocks_allocated_size = 0; size_t blocks_nullmaps_allocated_size = 0; + + /// Number of rows of right table to join + size_t rows_to_join = 0; + /// Number of keys of right table to join + size_t keys_to_join = 0; + + size_t avgPerKeyRows() const + { + if (keys_to_join == 0) + return 0; + return rows_to_join / keys_to_join; + } }; using RightTableDataPtr = std::shared_ptr; diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 0dfafa94efc..97ad57d26ea 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -12,15 +12,8 @@ #include #include - namespace DB { -namespace ErrorCodes -{ - extern const int UNSUPPORTED_JOIN_KEYS; - extern const int LOGICAL_ERROR; -} - /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. template struct Inserter @@ -64,7 +57,6 @@ struct Inserter } }; - /// MapsTemplate is one of MapsOne, MapsAll and MapsAsof template class HashJoinMethods @@ -81,27 +73,7 @@ public: ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, - bool & is_inserted) - { - switch (type) - { - case HashJoin::Type::EMPTY: - [[fallthrough]]; - case HashJoin::Type::CROSS: - /// Do nothing. We will only save block, and it is enough - is_inserted = true; - return 0; - - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return insertFromBlockImplTypeCase>::Type>(\ - join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ - break; - - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - } + bool & is_inserted); using MapsTemplateVector = std::vector; @@ -110,280 +82,37 @@ public: Block & block, const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, - bool is_join_get = false) - { - constexpr JoinFeatures join_features; - - std::vector join_on_keys; - const auto & onexprs = join.table_join->getClauses(); - for (size_t i = 0; i < onexprs.size(); ++i) - { - const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; - join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); - } - size_t existing_columns = block.columns(); - - /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. - * Because if they are constants, then in the "not joined" rows, they may have different values - * - default values, which can differ from the values of these constants. - */ - if constexpr (join_features.right || join_features.full) - { - materializeBlockInplace(block); - } - - /** For LEFT/INNER JOIN, the saved blocks do not contain keys. - * For FULL/RIGHT JOIN, the saved blocks contain keys; - * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. - * For ASOF, the last column is used as the ASOF column - */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - join.savedBlockSample(), - join, - std::move(join_on_keys), - join.table_join->getMixedJoinExpression(), - join_features.is_asof_join, - is_join_get); - - bool has_required_right_keys = (join.required_right_keys.columns() != 0); - added_columns.need_filter = join_features.need_filter || has_required_right_keys; - added_columns.max_joined_block_rows = join.max_joined_block_rows; - if (!added_columns.max_joined_block_rows) - added_columns.max_joined_block_rows = std::numeric_limits::max(); - else - added_columns.reserve(join_features.need_replication); - - size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); - /// Do not hold memory for join_on_keys anymore - added_columns.join_on_keys.clear(); - Block remaining_block = sliceBlock(block, num_joined); - - added_columns.buildOutput(); - for (size_t i = 0; i < added_columns.size(); ++i) - block.insert(added_columns.moveColumn(i)); - - std::vector right_keys_to_replicate [[maybe_unused]]; - - if constexpr (join_features.need_filter) - { - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); - - /// Add join key columns from right block if needed using value from left table because of equality - for (size_t i = 0; i < join.required_right_keys.columns(); ++i) - { - const auto & right_key = join.required_right_keys.getByPosition(i); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); - block.insert(std::move(right_col)); - } - } - else if (has_required_right_keys) - { - /// Add join key columns from right block if needed. - for (size_t i = 0; i < join.required_right_keys.columns(); ++i) - { - const auto & right_key = join.required_right_keys.getByPosition(i); - auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); - block.insert(std::move(right_col)); - - if constexpr (join_features.need_replication) - right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); - } - } - - if constexpr (join_features.need_replication) - { - std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - { - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); - } - - /// Replicate additional right keys - for (size_t pos : right_keys_to_replicate) - { - block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); - } - } - - return remaining_block; - } + bool is_join_get = false); private: template - static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) - { - if constexpr (is_asof_join) - { - auto key_column_copy = key_columns; - auto key_size_copy = key_sizes; - key_column_copy.pop_back(); - key_size_copy.pop_back(); - return KeyGetter(key_column_copy, key_size_copy, nullptr); - } - else - return KeyGetter(key_columns, key_sizes, nullptr); - } + static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes); template - static size_t NO_INLINE insertFromBlockImplTypeCase( + static size_t insertFromBlockImplTypeCase( HashJoin & join, HashMap & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - - const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (is_asof_join) - asof_column = key_columns.back(); - - auto key_getter = createKeyGetter(key_columns, key_sizes); - - /// For ALL and ASOF join always insert values - is_inserted = !mapped_one || is_asof_join; - - for (size_t i = 0; i < rows; ++i) - { - if (null_map && (*null_map)[i]) - { - /// nulls are not inserted into hash table, - /// keep them for RIGHT and FULL joins - is_inserted = true; - continue; - } - - /// Check condition for right table from ON section - if (join_mask && !(*join_mask)[i]) - continue; - - if constexpr (is_asof_join) - Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); - else if constexpr (mapped_one) - is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); - else - Inserter::insertAll(join, map, key_getter, stored_block, i, pool); - } - return map.getBufferSizeInCells(); - } + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted); template static size_t switchJoinRightColumns( const std::vector & mapv, AddedColumns & added_columns, HashJoin::Type type, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - switch (type) - { - case HashJoin::Type::EMPTY: { - if constexpr (!is_asof_join) - { - using KeyGetter = KeyGetterEmpty; - std::vector key_getter_vector; - key_getter_vector.emplace_back(); - - using MapTypeVal = typename KeyGetter::MappedType; - std::vector a_map_type_vector; - a_map_type_vector.emplace_back(); - return joinRightColumnsSwitchNullability( - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); - } - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); - } - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - { \ - using MapTypeVal = const typename std::remove_reference_t::element_type; \ - using KeyGetter = typename KeyGetterForType::Type; \ - std::vector a_map_type_vector(mapv.size()); \ - std::vector key_getter_vector; \ - for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ - { \ - const auto & join_on_key = added_columns.join_on_keys[d]; \ - a_map_type_vector[d] = mapv[d]->TYPE.get(); \ - key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ - } \ - return joinRightColumnsSwitchNullability( \ - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ - } - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); - } - } + JoinStuff::JoinUsedFlags & used_flags); template static size_t joinRightColumnsSwitchNullability( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - if (added_columns.need_filter) - { - return joinRightColumnsSwitchMultipleDisjuncts( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - else - { - return joinRightColumnsSwitchMultipleDisjuncts( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - } + JoinStuff::JoinUsedFlags & used_flags); template static size_t joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr JoinFeatures join_features; - if constexpr (join_features.is_all_join) - { - if (added_columns.additional_filter_expression) - { - bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( - std::forward>(key_getter_vector), - mapv, - added_columns, - used_flags, - need_filter, - join_features.need_flags, - join_features.add_missing, - mark_per_row_used); - } - } - - if (added_columns.additional_filter_expression) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); - - return mapv.size() > 1 ? joinRightColumns( - std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } + JoinStuff::JoinUsedFlags & used_flags); /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). @@ -392,464 +121,30 @@ private: std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr JoinFeatures join_features; - - size_t rows = added_columns.rows_to_add; - if constexpr (need_filter) - added_columns.filter = IColumn::Filter(rows, 0); - - Arena pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t i = 0; - for (; i < rows; ++i) - { - if constexpr (join_features.need_replication) - { - if (unlikely(current_offset >= max_joined_block_rows)) - { - added_columns.offsets_to_replicate->resize_assume_reserved(i); - added_columns.filter.resize_assume_reserved(i); - break; - } - } - - bool right_row_found = false; - - KnownRowsHolder known_rows; - for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) - { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - - if (find_result.isFound()) - { - right_row_found = true; - auto & mapped = find_result.getMapped(); - if constexpr (join_features.is_asof_join) - { - const IColumn & left_asof_key = added_columns.leftAsofKey(); - - auto row_ref = mapped->findAsof(left_asof_key, i); - if (row_ref.block) - { - setUsed(added_columns.filter, i); - if constexpr (flag_per_row) - used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); - else - used_flags.template setUsed(find_result); - - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); - } - else - addNotFoundRow(added_columns, current_offset); - } - else if constexpr (join_features.is_all_join) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) - { - /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); - if (used_once) - { - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - setUsed(added_columns.filter, i); - addFoundRowAll( - mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - } - else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) - { - bool used_once = used_flags.template setUsedOnce(find_result); - - /// Use first appeared left key only - if (used_once) - { - setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - } - - break; - } - else if constexpr (join_features.is_any_join && join_features.full) - { - /// TODO - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); - } - else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - - if (join_features.is_any_or_semi_join) - { - break; - } - } - } - } - - if (!right_row_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(added_columns.filter, i); - addNotFoundRow(added_columns, current_offset); - } - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - } - - added_columns.applyLazyDefaults(); - return i; - } + JoinStuff::JoinUsedFlags & used_flags); template - static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) - { - if constexpr (need_filter) - filter[pos] = 1; - } + static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]); template static ColumnPtr buildAdditionalFilter( size_t left_start_row, - const std::vector & selected_rows, + const std::vector & selected_rows, const std::vector & row_replicate_offset, - AddedColumns & added_columns) - { - ColumnPtr result_column; - do - { - if (selected_rows.empty()) - { - result_column = ColumnUInt8::create(); - break; - } - const Block & sample_right_block = *selected_rows.begin()->block; - if (!sample_right_block || !added_columns.additional_filter_expression) - { - auto filter = ColumnUInt8::create(); - filter->insertMany(1, selected_rows.size()); - result_column = std::move(filter); - break; - } - - auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); - if (required_cols.empty()) - { - Block block; - added_columns.additional_filter_expression->execute(block); - result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); - break; - } - NameSet required_column_names; - for (auto & col : required_cols) - required_column_names.insert(col.name); - - Block executed_block; - size_t right_col_pos = 0; - for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) - { - if (required_column_names.contains(col.name)) - { - auto new_col = col.column->cloneEmpty(); - for (const auto & selected_row : selected_rows) - { - const auto & src_col = selected_row.block->getByPosition(right_col_pos); - new_col->insertFrom(*src_col.column, selected_row.row_num); - } - executed_block.insert({std::move(new_col), col.type, col.name}); - } - right_col_pos += 1; - } - if (!executed_block) - { - result_column = ColumnUInt8::create(); - break; - } - - for (const auto & col_name : required_column_names) - { - const auto * src_col = added_columns.left_block.findByName(col_name); - if (!src_col) - continue; - auto new_col = src_col->column->cloneEmpty(); - size_t prev_left_offset = 0; - for (size_t i = 1; i < row_replicate_offset.size(); ++i) - { - const size_t & left_offset = row_replicate_offset[i]; - size_t rows = left_offset - prev_left_offset; - if (rows) - new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); - prev_left_offset = left_offset; - } - executed_block.insert({std::move(new_col), src_col->type, col_name}); - } - if (!executed_block) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", - required_cols.toString(), - sample_right_block.dumpNames(), - added_columns.left_block.dumpNames()); - } - - for (const auto & col : executed_block.getColumnsWithTypeAndName()) - if (!col.column || !col.type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); - - added_columns.additional_filter_expression->execute(executed_block); - result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); - executed_block.clear(); - } while (false); - - result_column = result_column->convertToFullIfNeeded(); - if (result_column->isNullable()) - { - /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros - /// Trying to avoid copying data, since we are the only owner of the column. - ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); - - MutableColumnPtr mutable_column; - { - ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); - result_column.reset(); - mutable_column = IColumn::mutate(std::move(nested_column)); - } - - auto & column_data = assert_cast(*mutable_column).getData(); - const auto & mask_column_data = assert_cast(*mask_column).getData(); - for (size_t i = 0; i < column_data.size(); ++i) - { - if (mask_column_data[i]) - column_data[i] = 0; - } - return mutable_column; - } - return result_column; - } + AddedColumns & added_columns); /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. - template + template static size_t joinRightColumnsWithAddtitionalFilter( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], bool need_filter [[maybe_unused]], - bool need_flags [[maybe_unused]], - bool add_missing [[maybe_unused]], - bool flag_per_row [[maybe_unused]]) - { - size_t left_block_rows = added_columns.rows_to_add; - if (need_filter) - added_columns.filter = IColumn::Filter(left_block_rows, 0); - - std::unique_ptr pool; - - if constexpr (need_replication) - added_columns.offsets_to_replicate = std::make_unique(left_block_rows); - - std::vector row_replicate_offset; - row_replicate_offset.reserve(left_block_rows); - - using FindResult = typename KeyGetter::FindResult; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t left_row_iter = 0; - PreSelectedRows selected_rows; - selected_rows.reserve(left_block_rows); - std::vector find_results; - find_results.reserve(left_block_rows); - bool exceeded_max_block_rows = false; - IColumn::Offset total_added_rows = 0; - IColumn::Offset current_added_rows = 0; - - auto collect_keys_matched_rows_refs = [&]() - { - pool = std::make_unique(); - find_results.clear(); - row_replicate_offset.clear(); - row_replicate_offset.push_back(0); - current_added_rows = 0; - selected_rows.clear(); - for (; left_row_iter < left_block_rows; ++left_row_iter) - { - if constexpr (need_replication) - { - if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) - { - break; - } - } - KnownRowsHolder all_flag_known_rows; - KnownRowsHolder single_flag_know_rows; - for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) - { - const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; - if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); - auto find_result = row_acceptable - ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) - : FindResult(); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - find_results.push_back(find_result); - if (flag_per_row) - addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); - else - addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); - } - } - row_replicate_offset.push_back(current_added_rows); - } - }; - - auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) - { - const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); - - size_t prev_replicated_row = 0; - auto selected_right_row_it = selected_rows.begin(); - size_t find_result_index = 0; - for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) - { - bool any_matched = false; - /// For all right join, flag_per_row is true, we need mark used flags for each row. - if (flag_per_row) - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - if (need_flags) - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); - } - ++selected_right_row_it; - } - } - else - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - } - ++selected_right_row_it; - } - } - if (!any_matched) - { - if (add_missing) - addNotFoundRow(added_columns, total_added_rows); - else - addNotFoundRow(added_columns, total_added_rows); - } - else - { - if (!flag_per_row && need_flags) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if (add_missing) - added_columns.applyLazyDefaults(); - } - find_result_index += (prev_replicated_row != row_replicate_offset[i]); - - if constexpr (need_replication) - { - (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; - } - prev_replicated_row = row_replicate_offset[i]; - } - }; - - while (left_row_iter < left_block_rows && !exceeded_max_block_rows) - { - auto left_start_row = left_row_iter; - collect_keys_matched_rows_refs(); - if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " - "left_start_row: {}", - selected_rows.size(), - current_added_rows, - row_replicate_offset.size(), - left_row_iter, - left_start_row); - } - auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); - copy_final_matched_rows(left_start_row, filter_col); - - if constexpr (need_replication) - { - // Add a check for current_added_rows to avoid run the filter expression on too small size batch. - if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - exceeded_max_block_rows = true; - } - } - - if constexpr (need_replication) - { - added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); - added_columns.filter.resize_assume_reserved(left_row_iter); - } - added_columns.applyLazyDefaults(); - return left_row_iter; - } + bool flag_per_row [[maybe_unused]]); /// Cut first num_rows rows from block in place and returns block with remaining rows - static Block sliceBlock(Block & block, size_t num_rows) - { - size_t total_rows = block.rows(); - if (num_rows >= total_rows) - return {}; - size_t remaining_rows = total_rows - num_rows; - Block remaining_block = block.cloneEmpty(); - for (size_t i = 0; i < block.columns(); ++i) - { - auto & col = block.getByPosition(i); - remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); - col.column = col.column->cut(0, num_rows); - } - return remaining_block; - } + static Block sliceBlock(Block & block, size_t num_rows); /** Since we do not store right key columns, * this function is used to copy left key columns to right key columns. @@ -864,70 +159,22 @@ private: const DataTypePtr & right_key_type, const String & renamed_right_column, const ColumnWithTypeAndName & left_column, - const IColumn::Filter * null_map_filter = nullptr) - { - ColumnWithTypeAndName right_column = left_column; - right_column.name = renamed_right_column; + const IColumn::Filter * null_map_filter = nullptr); - if (null_map_filter) - right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable); - bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); - if (null_map_filter) - correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); - else - correctNullabilityInplace(right_column, should_be_nullable); - - if (!right_column.type->equals(*right_key_type)) - { - right_column.column = castColumnAccurate(right_column, right_key_type); - right_column.type = right_key_type; - } - - right_column.column = right_column.column->convertToFullColumnIfConst(); - return right_column; - } - - static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) - { - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - } - else - { - /// We have to replace values masked by NULLs with defaults. - if (column.column) - if (const auto * nullable_column = checkAndGetColumn(&*column.column)) - column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); - - JoinCommon::removeColumnNullability(column); - } - } - - static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) - { - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - if (column.type->isNullable() && !negative_null_map.empty()) - { - MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); - assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); - column.column = std::move(mutable_column); - } - } - else - JoinCommon::removeColumnNullability(column); - } + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map); }; /// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`. extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; @@ -939,6 +186,7 @@ extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; diff --git a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h new file mode 100644 index 00000000000..320c8851ce4 --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h @@ -0,0 +1,935 @@ +#pragma once +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int UNSUPPORTED_JOIN_KEYS; +extern const int LOGICAL_ERROR; +} +template +size_t HashJoinMethods::insertFromBlockImpl( + HashJoin & join, + HashJoin::Type type, + MapsTemplate & maps, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) +{ + switch (type) + { + case HashJoin::Type::EMPTY: + [[fallthrough]]; + case HashJoin::Type::CROSS: + /// Do nothing. We will only save block, and it is enough + is_inserted = true; + return 0; + +#define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return insertFromBlockImplTypeCase< \ + typename KeyGetterForType>::Type>( \ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ + break; + + APPLY_FOR_JOIN_VARIANTS(M) +#undef M + } +} + +template +Block HashJoinMethods::joinBlockImpl( + const HashJoin & join, Block & block, const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, bool is_join_get) +{ + constexpr JoinFeatures join_features; + + std::vector join_on_keys; + const auto & onexprs = join.table_join->getClauses(); + for (size_t i = 0; i < onexprs.size(); ++i) + { + const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; + join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); + } + size_t existing_columns = block.columns(); + + /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. + * Because if they are constants, then in the "not joined" rows, they may have different values + * - default values, which can differ from the values of these constants. + */ + if constexpr (join_features.right || join_features.full) + { + materializeBlockInplace(block); + } + + /** For LEFT/INNER JOIN, the saved blocks do not contain keys. + * For FULL/RIGHT JOIN, the saved blocks contain keys; + * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. + * For ASOF, the last column is used as the ASOF column + */ + AddedColumns added_columns( + block, + block_with_columns_to_add, + join.savedBlockSample(), + join, + std::move(join_on_keys), + join.table_join->getMixedJoinExpression(), + join_features.is_asof_join, + is_join_get); + + bool has_required_right_keys = (join.required_right_keys.columns() != 0); + added_columns.need_filter = join_features.need_filter || has_required_right_keys; + added_columns.max_joined_block_rows = join.max_joined_block_rows; + if (!added_columns.max_joined_block_rows) + added_columns.max_joined_block_rows = std::numeric_limits::max(); + else + added_columns.reserve(join_features.need_replication); + + size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); + /// Do not hold memory for join_on_keys anymore + added_columns.join_on_keys.clear(); + Block remaining_block = sliceBlock(block, num_joined); + + if (is_join_get) + added_columns.buildJoinGetOutput(); + else + added_columns.buildOutput(); + for (size_t i = 0; i < added_columns.size(); ++i) + block.insert(added_columns.moveColumn(i)); + + std::vector right_keys_to_replicate [[maybe_unused]]; + + if constexpr (join_features.need_filter) + { + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); + + /// Add join key columns from right block if needed using value from left table because of equality + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); + block.insert(std::move(right_col)); + } + } + else if (has_required_right_keys) + { + /// Add join key columns from right block if needed. + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); + block.insert(std::move(right_col)); + + if constexpr (join_features.need_replication) + right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); + } + } + + if constexpr (join_features.need_replication) + { + std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; + + /// If ALL ... JOIN - we replicate all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + { + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } + + /// Replicate additional right keys + for (size_t pos : right_keys_to_replicate) + { + block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + } + } + return remaining_block; +} + +template +template +KeyGetter HashJoinMethods::createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) +{ + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); +} + +template +template +size_t HashJoinMethods::insertFromBlockImplTypeCase( + HashJoin & join, + HashMap & map, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) +{ + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + + const IColumn * asof_column [[maybe_unused]] = nullptr; + if constexpr (is_asof_join) + asof_column = key_columns.back(); + + auto key_getter = createKeyGetter(key_columns, key_sizes); + + /// For ALL and ASOF join always insert values + is_inserted = !mapped_one || is_asof_join; + + for (size_t i = 0; i < rows; ++i) + { + if (null_map && (*null_map)[i]) + { + /// nulls are not inserted into hash table, + /// keep them for RIGHT and FULL joins + is_inserted = true; + continue; + } + + /// Check condition for right table from ON section + if (join_mask && !(*join_mask)[i]) + continue; + + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); + else if constexpr (mapped_one) + is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); + else + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); + } + return map.getBufferSizeInCells(); +} + +template +template +size_t HashJoinMethods::switchJoinRightColumns( + const std::vector & mapv, + AddedColumns & added_columns, + HashJoin::Type type, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + switch (type) + { + case HashJoin::Type::EMPTY: { + if constexpr (!is_asof_join) + { + using KeyGetter = KeyGetterEmpty; + std::vector key_getter_vector; + key_getter_vector.emplace_back(); + + using MapTypeVal = typename KeyGetter::MappedType; + std::vector a_map_type_vector; + a_map_type_vector.emplace_back(); + return joinRightColumnsSwitchNullability( + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); + } + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); + } +#define M(TYPE) \ + case HashJoin::Type::TYPE: { \ + using MapTypeVal = const typename std::remove_reference_t::element_type; \ + using KeyGetter = typename KeyGetterForType::Type; \ + std::vector a_map_type_vector(mapv.size()); \ + std::vector key_getter_vector; \ + for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ + { \ + const auto & join_on_key = added_columns.join_on_keys[d]; \ + a_map_type_vector[d] = mapv[d]->TYPE.get(); \ + key_getter_vector.push_back( \ + std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ + } \ + return joinRightColumnsSwitchNullability(std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ + } + APPLY_FOR_JOIN_VARIANTS(M) +#undef M + + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); + } +} + +template +template +size_t HashJoinMethods::joinRightColumnsSwitchNullability( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + if (added_columns.need_filter) + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + else + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } +} + +template +template +size_t HashJoinMethods::joinRightColumnsSwitchMultipleDisjuncts( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr JoinFeatures join_features; + if constexpr (join_features.is_maps_all) + { + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), mapv, added_columns, used_flags, need_filter, mark_per_row_used); + } + } + + if (added_columns.additional_filter_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); + + return mapv.size() > 1 ? joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); +} + + +/// Joins right table columns which indexes are present in right_indexes using specified map. +/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). +template +template +size_t HashJoinMethods::joinRightColumns( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr JoinFeatures join_features; + + size_t rows = added_columns.rows_to_add; + if constexpr (need_filter) + added_columns.filter = IColumn::Filter(rows, 0); + if constexpr (!flag_per_row && (STRICTNESS == JoinStrictness::All || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Right))) + added_columns.output_by_row_list = true; + + Arena pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t i = 0; + for (; i < rows; ++i) + { + if constexpr (join_features.need_replication) + { + if (unlikely(current_offset >= max_joined_block_rows)) + { + added_columns.offsets_to_replicate->resize(i); + added_columns.filter.resize(i); + break; + } + } + + bool right_row_found = false; + KnownRowsHolder known_rows; + for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) + { + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + + if (find_result.isFound()) + { + right_row_found = true; + auto & mapped = find_result.getMapped(); + if constexpr (join_features.is_asof_join) + { + const IColumn & left_asof_key = added_columns.leftAsofKey(); + + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref && row_ref->block) + { + setUsed(added_columns.filter, i); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref->block, row_ref->row_num, 0); + else + used_flags.template setUsed(find_result); + + added_columns.appendFromBlock(row_ref, join_features.add_missing); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (join_features.is_all_join) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) + { + /// Use first appeared left key + it needs left columns replication + bool used_once = used_flags.template setUsedOnce(find_result); + if (used_once) + { + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + setUsed(added_columns.filter, i); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + } + else if constexpr (join_features.is_any_join && join_features.inner) + { + bool used_once = used_flags.template setUsedOnce(find_result); + + /// Use first appeared left key only + if (used_once) + { + setUsed(added_columns.filter, i); + added_columns.appendFromBlock(&mapped, join_features.add_missing); + } + + break; + } + else if constexpr (join_features.is_any_join && join_features.full) + { + /// TODO + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(find_result); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + added_columns.appendFromBlock(&mapped, join_features.add_missing); + + if (join_features.is_any_or_semi_join) + { + break; + } + } + } + } + + if (!right_row_found) + { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(added_columns.filter, i); + addNotFoundRow(added_columns, current_offset); + } + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[i] = current_offset; + } + } + + added_columns.applyLazyDefaults(); + return i; +} + +template +template +void HashJoinMethods::setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) +{ + if constexpr (need_filter) + filter[pos] = 1; +} + +template +template +ColumnPtr HashJoinMethods::buildAdditionalFilter( + size_t left_start_row, + const std::vector & selected_rows, + const std::vector & row_replicate_offset, + AddedColumns & added_columns) +{ + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *((*selected_rows.begin())->block); + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row->block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row->row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; +} + +template +template +size_t HashJoinMethods::joinRightColumnsWithAddtitionalFilter( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], + bool need_filter [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) +{ + constexpr JoinFeatures join_features; + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (join_features.need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + /// We don't add missing in addFoundRowAll here. we will add it after filter is applied. + /// it's different from `joinRightColumns`. + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// right/full join or multiple disjuncts, we need to mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + if constexpr (join_features.is_semi_join || join_features.is_any_join) + { + /// For LEFT/INNER SEMI/ANY JOIN, we need to add only first appeared row from left, + if constexpr (join_features.left || join_features.inner) + { + if (!any_matched) + { + // For inner join, we need mark each right row'flag, because we only use each right row once. + auto used_once = used_flags.template setUsedOnce( + (*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); + if (used_once) + { + any_matched = true; + total_added_rows += 1; + added_columns.appendFromBlock(*selected_right_row_it, join_features.add_missing); + } + } + } + else + { + auto used_once = used_flags.template setUsedOnce( + (*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); + if (used_once) + { + any_matched = true; + total_added_rows += 1; + added_columns.appendFromBlock(*selected_right_row_it, join_features.add_missing); + } + } + } + else if constexpr (join_features.is_anti_join) + { + any_matched = true; + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed((*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); + } + else + { + any_matched = true; + total_added_rows += 1; + added_columns.appendFromBlock(*selected_right_row_it, join_features.add_missing); + used_flags.template setUsed((*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); + } + } + + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if constexpr (join_features.is_anti_join) + { + any_matched |= filter_flags[replicated_row]; + } + else if constexpr (join_features.need_replication) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it, join_features.add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + else + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it, join_features.add_missing); + total_added_rows += 1; + selected_right_row_it = selected_right_row_it + row_replicate_offset[i] - replicated_row; + break; + } + else + ++selected_right_row_it; + } + } + } + + + if constexpr (join_features.is_anti_join) + { + if (!any_matched) + { + if constexpr (join_features.left) + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + addNotFoundRow(added_columns, total_added_rows); + } + } + else + { + if (!any_matched) + { + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if constexpr (join_features.add_missing) + added_columns.applyLazyDefaults(); + } + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (join_features.need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + exceeded_max_block_rows = true; + } + } + + if constexpr (join_features.need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; +} + +template +Block HashJoinMethods::sliceBlock(Block & block, size_t num_rows) +{ + size_t total_rows = block.rows(); + if (num_rows >= total_rows) + return {}; + size_t remaining_rows = total_rows - num_rows; + Block remaining_block = block.cloneEmpty(); + for (size_t i = 0; i < block.columns(); ++i) + { + auto & col = block.getByPosition(i); + remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); + col.column = col.column->cut(0, num_rows); + } + return remaining_block; +} + +template +ColumnWithTypeAndName HashJoinMethods::copyLeftKeyColumnToRight( + const DataTypePtr & right_key_type, + const String & renamed_right_column, + const ColumnWithTypeAndName & left_column, + const IColumn::Filter * null_map_filter) +{ + ColumnWithTypeAndName right_column = left_column; + right_column.name = renamed_right_column; + + if (null_map_filter) + right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + + bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); + if (null_map_filter) + correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); + else + correctNullabilityInplace(right_column, should_be_nullable); + + if (!right_column.type->equals(*right_key_type)) + { + right_column.column = castColumnAccurate(right_column, right_key_type); + right_column.type = right_key_type; + } + + right_column.column = right_column.column->convertToFullColumnIfConst(); + return right_column; +} + +template +void HashJoinMethods::correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } +} + +template +void HashJoinMethods::correctNullabilityInplace( + ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + if (column.type->isNullable() && !negative_null_map.empty()) + { + MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); + assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + else + JoinCommon::removeColumnNullability(column); +} +} diff --git a/src/Interpreters/HashJoin/InnerHashJoin.cpp b/src/Interpreters/HashJoin/InnerHashJoin.cpp index 85aedf3a8e5..69f4c620cb8 100644 --- a/src/Interpreters/HashJoin/InnerHashJoin.cpp +++ b/src/Interpreters/HashJoin/InnerHashJoin.cpp @@ -1,10 +1,11 @@ -#include +#include namespace DB { template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; diff --git a/src/Interpreters/HashJoin/JoinFeatures.h b/src/Interpreters/HashJoin/JoinFeatures.h index 2f2bd1e29a2..b39593e7cac 100644 --- a/src/Interpreters/HashJoin/JoinFeatures.h +++ b/src/Interpreters/HashJoin/JoinFeatures.h @@ -3,26 +3,41 @@ #include namespace DB { -template +template struct JoinFeatures { static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; - static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; + static constexpr bool is_any_or_semi_join = is_any_join || STRICTNESS == JoinStrictness::RightAny || (is_semi_join && KIND == JoinKind::Left); static constexpr bool left = KIND == JoinKind::Left; static constexpr bool right = KIND == JoinKind::Right; static constexpr bool inner = KIND == JoinKind::Inner; static constexpr bool full = KIND == JoinKind::Full; + /** Whether we may need duplicate rows from the left table. + * For example, when we have row (key1, attr1) in left table + * and rows (key1, attr2), (key1, attr3) in right table, + * then we need to duplicate row (key1, attr1) for each of joined rows from right table, so result will be + * (key1, attr1, key1, attr2) + * (key1, attr1, key1, attr3) + */ static constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + + /// Whether we need to filter rows from the left table that do not have matches in the right table. static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); + + /// Whether we need to add default values for columns from the left table. static constexpr bool add_missing = (left || full) && !is_semi_join; - static constexpr bool need_flags = MapGetter::flagged; + /// Whether we need to store flags for rows from the right table table + /// that indicates if they have matches in the left table. + static constexpr bool need_flags = MapGetter, HashJoin::MapsAll>>::flagged; + + static constexpr bool is_maps_all = std::is_same_v, HashJoin::MapsAll>; }; } diff --git a/src/Interpreters/HashJoin/JoinUsedFlags.h b/src/Interpreters/HashJoin/JoinUsedFlags.h index bd41ba2073f..c84c6ec3fea 100644 --- a/src/Interpreters/HashJoin/JoinUsedFlags.h +++ b/src/Interpreters/HashJoin/JoinUsedFlags.h @@ -26,10 +26,10 @@ public: /// Update size for vector with flags. /// Calling this method invalidates existing flags. /// It can be called several times, but all of them should happen before using this structure. - template + template void reinit(size_t size) { - if constexpr (MapGetter::flagged) + if constexpr (MapGetter::flagged) { assert(flags[nullptr].size() <= size); need_flags = true; @@ -43,10 +43,10 @@ public: } } - template + template void reinit(const Block * block_ptr) { - if constexpr (MapGetter::flagged) + if constexpr (MapGetter::flagged) { assert(flags[block_ptr].size() <= block_ptr->rows()); need_flags = true; @@ -148,6 +148,31 @@ public: } } + template + bool setUsedOnce(const Block * block, size_t row_num, size_t offset) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + /// fast check to prevent heavy CAS with seq_cst order + if (flags[block][row_num].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[block][row_num].compare_exchange_strong(expected, true); + } + else + { + /// fast check to prevent heavy CAS with seq_cst order + if (flags[nullptr][offset].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[nullptr][offset].compare_exchange_strong(expected, true); + } + } }; } diff --git a/src/Interpreters/HashJoin/KnowRowsHolder.h b/src/Interpreters/HashJoin/KnowRowsHolder.h index d51c96893c5..9223e98d13c 100644 --- a/src/Interpreters/HashJoin/KnowRowsHolder.h +++ b/src/Interpreters/HashJoin/KnowRowsHolder.h @@ -104,7 +104,7 @@ void addFoundRowAll( { if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) { - added.appendFromBlock(*it->block, it->row_num, false); + added.appendFromBlock(*it, false); ++current_offset; if (!new_known_rows_ptr) { @@ -124,11 +124,16 @@ void addFoundRowAll( known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr)); } } + else if constexpr (AddedColumns::isLazy()) + { + added.appendFromBlock(&mapped, false); + current_offset += mapped.rows; + } else { for (auto it = mapped.begin(); it.ok(); ++it) { - added.appendFromBlock(*it->block, it->row_num, false); + added.appendFromBlock(*it, false); ++current_offset; } } diff --git a/src/Interpreters/HashJoin/LeftHashJoin.cpp b/src/Interpreters/HashJoin/LeftHashJoin.cpp index 69e17ff70bd..4e06789570e 100644 --- a/src/Interpreters/HashJoin/LeftHashJoin.cpp +++ b/src/Interpreters/HashJoin/LeftHashJoin.cpp @@ -1,11 +1,14 @@ -#include +#include namespace DB { template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; } diff --git a/src/Interpreters/HashJoin/RightHashJoin.cpp b/src/Interpreters/HashJoin/RightHashJoin.cpp index 8e304754f5c..d9d41d7d63c 100644 --- a/src/Interpreters/HashJoin/RightHashJoin.cpp +++ b/src/Interpreters/HashJoin/RightHashJoin.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 3f0fe51b0e2..80cb0510b35 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -120,6 +121,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int TOO_MANY_TABLES; extern const int TOO_MANY_DATABASES; + extern const int THERE_IS_NO_COLUMN; } namespace fs = std::filesystem; @@ -689,7 +691,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( throw Exception(ErrorCodes::LOGICAL_ERROR, "Neither default value expression nor type is provided for a column"); if (col_decl.comment) - column.comment = col_decl.comment->as().value.get(); + column.comment = col_decl.comment->as().value.safeGet(); if (col_decl.codec) { @@ -699,7 +701,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } - column.statistics.column_name = column.name; /// We assign column name here for better exception error message. if (col_decl.statistics_desc) { if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics) @@ -751,6 +752,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.storage && create.storage->engine) getContext()->checkAccess(AccessType::TABLE_ENGINE, create.storage->engine->name); + /// If this is a TimeSeries table then we need to normalize list of columns (add missing columns and reorder), and also set inner table engines. + if (create.is_time_series_table && (mode < LoadingStrictnessLevel::ATTACH)) + StorageTimeSeries::normalizeTableDefinition(create, getContext()); + TableProperties properties; TableLockHolder as_storage_lock; @@ -782,10 +787,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'"); /// ---- - if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); - if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index) - throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index"); + if (index_desc.type == "vector_similarity" && !settings.allow_experimental_vector_similarity_index) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index is disabled. Turn on allow_experimental_vector_similarity_index"); properties.indices.push_back(index_desc); } @@ -844,6 +847,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); + properties.columns_inferred_from_select_query = true; } else if (create.as_table_function) { @@ -933,6 +937,105 @@ void validateVirtualColumns(const IStorage & storage) } } +void InterpreterCreateQuery::validateMaterializedViewColumnsAndEngine(const ASTCreateQuery & create, const TableProperties & properties, const DatabasePtr & database) +{ + /// This is not strict validation, just catches common errors that would make the view not work. + /// It's possible to circumvent these checks by ALTERing the view or target table after creation; + /// we should probably do some of these checks on ALTER as well. + + NamesAndTypesList all_output_columns; + bool check_columns = false; + if (create.hasTargetTableID(ViewTarget::To)) + { + if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable( + create.getTargetTableID(ViewTarget::To), getContext())) + { + all_output_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock().getNamesAndTypesList(); + check_columns = true; + } + } + else if (!properties.columns_inferred_from_select_query) + { + all_output_columns = properties.columns.getInsertable(); + check_columns = true; + } + + if (create.refresh_strategy && !create.refresh_strategy->append) + { + if (database && database->getEngineName() != "Atomic") + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Refreshable materialized views (except with APPEND) only support Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName()); + } + + Block input_block; + + if (check_columns) + { + try + { + if (getContext()->getSettingsRef().allow_experimental_analyzer) + { + input_block = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext()); + } + else + { + input_block = InterpreterSelectWithUnionQuery(create.select->clone(), + getContext(), + SelectQueryOptions().analyze()).getSampleBlock(); + } + } + catch (Exception &) + { + if (!getContext()->getSettingsRef().allow_materialized_view_with_bad_select) + throw; + check_columns = false; + } + } + + if (check_columns) + { + std::unordered_map output_types; + for (const NameAndTypePair & nt : all_output_columns) + output_types[nt.name] = nt.type; + + ColumnsWithTypeAndName input_columns; + ColumnsWithTypeAndName output_columns; + for (const auto & input_column : input_block) + { + auto it = output_types.find(input_column.name); + if (it != output_types.end()) + { + input_columns.push_back(input_column.cloneEmpty()); + output_columns.push_back(ColumnWithTypeAndName(it->second->createColumn(), it->second, input_column.name)); + } + else if (create.refresh_strategy) + { + /// Unrecognized columns produced by SELECT query are allowed by regular materialized + /// views, but not by refreshable ones. This is in part because it was easier to + /// implement, in part because refreshable views have less concern about ALTERing target + /// tables. + /// + /// The motivating scenario for allowing this in regular MV is ALTERing the table+query. + /// Suppose the user removes a column from target table, then a minute later + /// correspondingly updates the view's query to not produce that column. + /// If MV didn't allow unrecognized columns then during that minute all INSERTs into the + /// source table would fail - unacceptable. + /// For refreshable views, during that minute refreshes will fail - acceptable. + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "SELECT query outputs column with name '{}', which is not found in the target table. Use 'AS' to assign alias that matches a column name.", input_column.name); + } + } + + if (input_columns.empty()) + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "None of the columns produced by the SELECT query are present in the target table. Use 'AS' to assign aliases that match column names."); + + ActionsDAG::makeConvertingActions( + input_columns, + output_columns, + ActionsDAG::MatchColumnsMode::Position + ); + } +} + namespace { void checkTemporaryTableEngineName(const String & name) @@ -1093,6 +1196,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const else if (as_create.storage) { storage_def = typeid_cast>(as_create.storage->ptr()); + create.is_time_series_table = as_create.is_time_series_table; } else { @@ -1128,13 +1232,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log"); } - if (create.refresh_strategy && database->getEngineName() != "Atomic") - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName()); - /// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree. - /// Figure out how to make the refreshed data appear all at once on other - /// replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename? - if (database->getUUID() != UUIDHelpers::Nil) { if (create.attach && !from_path && create.uuid == UUIDHelpers::Nil) @@ -1355,51 +1452,16 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way. TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); - /// Check type compatible for materialized dest table and select columns - if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE) - { - if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext())) - { - Block input_block; - - if (getContext()->getSettingsRef().allow_experimental_analyzer) - { - input_block = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext()); - } - else - { - input_block = InterpreterSelectWithUnionQuery(create.select->clone(), - getContext(), - SelectQueryOptions().analyze()).getSampleBlock(); - } - - Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock(); - - ColumnsWithTypeAndName input_columns; - ColumnsWithTypeAndName output_columns; - for (const auto & input_column : input_block) - { - if (const auto * output_column = output_block.findByName(input_column.name)) - { - input_columns.push_back(input_column.cloneEmpty()); - output_columns.push_back(output_column->cloneEmpty()); - } - } - - ActionsDAG::makeConvertingActions( - input_columns, - output_columns, - ActionsDAG::MatchColumnsMode::Position - ); - } - } - DatabasePtr database; bool need_add_to_database = !create.temporary; // In case of an ON CLUSTER query, the database may not be present on the initiator node if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); + /// Check type compatible for materialized dest table and select columns + if (create.select && create.is_materialized_view && mode <= LoadingStrictnessLevel::CREATE) + validateMaterializedViewColumnsAndEngine(create, properties, database); + bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate; if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { @@ -1910,7 +1972,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont if (has_explicit_zk_path_arg) { - String zk_path = create.storage->engine->arguments->children[0]->as()->value.get(); + String zk_path = create.storage->engine->arguments->children[0]->as()->value.safeGet(); Macros::MacroExpansionInfo info; info.table_id.uuid = create.uuid; info.ignore_unknown = true; @@ -1940,6 +2002,8 @@ BlockIO InterpreterCreateQuery::execute() FunctionNameNormalizer::visit(query_ptr.get()); auto & create = query_ptr->as(); + create.if_not_exists |= getContext()->getSettingsRef().create_if_not_exists; + bool is_create_database = create.database && !create.table; if (!create.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) { diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 3982ea2cabc..5047c372c71 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -90,6 +90,7 @@ private: IndicesDescription indices; ConstraintsDescription constraints; ProjectionsDescription projections; + bool columns_inferred_from_select_query = false; }; BlockIO createDatabase(ASTCreateQuery & create); @@ -98,6 +99,7 @@ private: /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create, LoadingStrictnessLevel mode) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; + void validateMaterializedViewColumnsAndEngine(const ASTCreateQuery & create, const TableProperties & properties, const DatabasePtr & database); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 291c8e19db0..4827edc6c2a 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -27,7 +28,6 @@ namespace ErrorCodes extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; - extern const int NOT_IMPLEMENTED; extern const int QUERY_IS_PROHIBITED; } @@ -67,13 +67,42 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto lightweightDelete = [&]() + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is disabled. " "Set `enable_lightweight_delete` setting to enable it"); + if (metadata_snapshot->hasProjections()) + { + if (const auto * merge_tree_data = dynamic_cast(table.get())) + if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "DELETE query is not allowed for table {} because as it has projections and setting " + "lightweight_mutation_projection_mode is set to THROW. " + "User should change lightweight_mutation_projection_mode OR " + "drop all the projections manually before running the query", + table_id.getFullTableName()); + } + /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query String alter_query = "ALTER TABLE " + table->getStorageID().getFullTableName() @@ -94,79 +123,9 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); - }; - - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) - { - return lightweightDelete(); } else { - if (table->hasProjection()) - { - auto context = Context::createCopy(getContext()); - auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; - if (mode == LightweightMutationProjectionMode::THROW) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); - } - else if (mode == LightweightMutationProjectionMode::DROP) - { - std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); - - context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); - - /// Drop projections first so that lightweight delete can be performed. - for (const auto & projection : all_projections) - { - String alter_query = - "ALTER TABLE " + table->getStorageID().getFullTableName() - + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) - + " DROP PROJECTION IF EXISTS " + projection; - - ParserAlterQuery parser; - ASTPtr alter_ast = parseQuery( - parser, - alter_query.data(), - alter_query.data() + alter_query.size(), - "ALTER query", - 0, - DBMS_DEFAULT_MAX_PARSER_DEPTH, - DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - - InterpreterAlterQuery alter_interpreter(alter_ast, context); - alter_interpreter.execute(); - } - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); - } - - return lightweightDelete(); - } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index c7e863bf260..c7464dc6b77 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -20,6 +20,7 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "max_size"}, ColumnWithTypeAndName{std::make_shared(), "max_elements"}, ColumnWithTypeAndName{std::make_shared(), "max_file_segment_size"}, + ColumnWithTypeAndName{std::make_shared(), "is_initialized"}, ColumnWithTypeAndName{std::make_shared(), "boundary_alignment"}, ColumnWithTypeAndName{std::make_shared>(), "cache_on_write_operations"}, ColumnWithTypeAndName{std::make_shared>(), "cache_hits_threshold"}, @@ -50,6 +51,7 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(settings.boundary_alignment); res_columns[i++]->insert(settings.cache_on_write_operations); res_columns[i++]->insert(settings.cache_hits_threshold); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index bedd9cb4a80..c820f999e0c 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -332,7 +332,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) if (settings.hasBooleanSetting(change.name)) { - auto value = change.value.get(); + auto value = change.value.safeGet(); if (value > 1) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value {} for setting \"{}\". " "Expected boolean type", value, change.name); @@ -341,7 +341,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) } else { - auto value = change.value.get(); + auto value = change.value.safeGet(); settings.setIntegerSetting(change.name, value); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 7eb487ba7b3..2c579f3b468 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -334,7 +334,7 @@ BlockIO InterpreterKillQueryQuery::execute() for (size_t i = 0; i < moves_block.rows(); ++i) { table_id = StorageID{database_col.getDataAt(i).toString(), table_col.getDataAt(i).toString()}; - auto task_uuid = task_uuid_col[i].get(); + auto task_uuid = task_uuid_col[i].safeGet(); CancellationCode code = CancellationCode::Unknown; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cb42a8abf9c..0c79f4310ce 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1232,7 +1232,7 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery std::shared_ptr collator; if (order_by_elem.getCollation()) - collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); + collator = std::make_shared(order_by_elem.getCollation()->as().value.safeGet()); if (order_by_elem.with_fill) { diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index d8fff4e6026..472cdedf3ae 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -68,6 +68,7 @@ WITH map( 'Map', 'JSON', 'Tuple', 'JSON', 'Object', 'JSON', + 'JSON', 'JSON', 'String', '{}', 'FixedString', '{}') AS native_to_mysql_mapping, )", diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index e5549b2e539..3de6b755609 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -97,7 +97,12 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() } MutableColumnPtr column = ColumnString::create(); - column->insert(format({.ctx = getContext(), .query = *create_query, .one_line = false})); + column->insert(format( + { + .ctx = getContext(), + .query = *create_query, + .one_line = false + })); return QueryPipeline(std::make_shared(Block{{ std::move(column), diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c284acfa308..d4e2f22036c 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -369,9 +369,12 @@ BlockIO InterpreterSystemQuery::execute() system_context->clearMMappedFileCache(); break; case Type::DROP_QUERY_CACHE: + { getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - getContext()->clearQueryCache(); + getContext()->clearQueryCache(query.query_cache_tag); break; + } + case Type::DROP_COMPILED_EXPRESSION_CACHE: #if USE_EMBEDDED_COMPILER getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); @@ -663,13 +666,20 @@ BlockIO InterpreterSystemQuery::execute() startStopAction(ActionLocks::ViewRefresh, false); break; case Type::REFRESH_VIEW: - getRefreshTask()->run(); + for (const auto & task : getRefreshTasks()) + task->run(); + break; + case Type::WAIT_VIEW: + for (const auto & task : getRefreshTasks()) + task->wait(); break; case Type::CANCEL_VIEW: - getRefreshTask()->cancel(); + for (const auto & task : getRefreshTasks()) + task->cancel(); break; case Type::TEST_VIEW: - getRefreshTask()->setFakeTime(query.fake_time_for_view); + for (const auto & task : getRefreshTasks()) + task->setFakeTime(query.fake_time_for_view); break; case Type::DROP_REPLICA: dropReplica(query); @@ -710,14 +720,8 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - - auto logs = getContext()->getSystemLogs(); - std::vector> commands; - commands.reserve(logs.size()); - for (auto * system_log : logs) - commands.emplace_back([system_log] { system_log->flush(true); }); - - executeCommandsAndThrowIfError(commands); + auto system_logs = getContext()->getSystemLogs(); + system_logs.flush(true); break; } case Type::STOP_LISTEN: @@ -1248,15 +1252,15 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery & query) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported"); } -RefreshTaskHolder InterpreterSystemQuery::getRefreshTask() +RefreshTaskList InterpreterSystemQuery::getRefreshTasks() { auto ctx = getContext(); ctx->checkAccess(AccessType::SYSTEM_VIEWS); - auto task = ctx->getRefreshSet().getTask(table_id); - if (!task) + auto tasks = ctx->getRefreshSet().findTasks(table_id); + if (tasks.empty()) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Refreshable view {} doesn't exist", table_id.getNameForLogs()); - return task; + return tasks; } @@ -1412,6 +1416,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::REFRESH_VIEW: + case Type::WAIT_VIEW: case Type::START_VIEW: case Type::START_VIEWS: case Type::STOP_VIEW: diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 776dd7915f0..f44fe930b04 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -74,7 +74,7 @@ private: void flushDistributed(ASTSystemQuery & query); [[noreturn]] void restartDisk(String & name); - RefreshTaskHolder getRefreshTask(); + RefreshTaskList getRefreshTasks(); AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 57ad5caa4c7..0b93b5989b1 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -146,7 +146,6 @@ ColumnDependencies getAllColumnDependencies( bool isStorageTouchedByMutations( - MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, @@ -155,7 +154,9 @@ bool isStorageTouchedByMutations( if (commands.empty()) return false; + auto storage_from_part = std::make_shared(source_part); bool all_commands_can_be_skipped = true; + for (const auto & command : commands) { if (command.type == MutationCommand::APPLY_DELETED_MASK) @@ -170,7 +171,7 @@ bool isStorageTouchedByMutations( if (command.partition) { - const String partition_id = storage.getPartitionIDFromQuery(command.partition, context); + const String partition_id = storage_from_part->getPartitionIDFromQuery(command.partition, context); if (partition_id == source_part->info.partition_id) all_commands_can_be_skipped = false; } @@ -184,20 +185,18 @@ bool isStorageTouchedByMutations( if (all_commands_can_be_skipped) return false; - auto storage_from_part = std::make_shared(source_part); - std::optional interpreter_select_query; BlockIO io; if (context->getSettingsRef().allow_experimental_analyzer) { - auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context); + auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage_from_part, context); InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits()); io = interpreter.execute(); } else { - ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context); + ASTPtr select_query = prepareQueryAffectedAST(commands, storage_from_part, context); /// Interpreter must be alive, when we use result of execute() method. /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. @@ -220,7 +219,7 @@ bool isStorageTouchedByMutations( Block tmp_block; while (executor.pull(tmp_block)); - auto count = (*block.getByName("count()").column)[0].get(); + auto count = (*block.getByName("count()").column)[0].safeGet(); return count != 0; } @@ -501,6 +500,12 @@ static void validateUpdateColumns( throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } } + else if (storage_columns.getColumn(GetColumnsOptions::Ordinary, column_name).type->hasDynamicSubcolumns()) + { + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, + "Cannot update column {} with type {}: updates of columns with dynamic subcolumns are not supported", + backQuote(column_name), storage_columns.getColumn(GetColumnsOptions::Ordinary, column_name).type->getName()); + } } } diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 6aaa233cda3..57863e9ae73 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -19,7 +19,6 @@ using QueryPipelineBuilderPtr = std::unique_ptr; /// Return false if the data isn't going to be changed by mutations. bool isStorageTouchedByMutations( - MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index dd205ae6508..913f9900b77 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -42,13 +42,13 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy if (isDateOrDate32(column.type.get())) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 48c9988b6fc..e9a663d53b0 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -24,7 +24,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v if (literal->value.getType() == Field::Types::Int64 || literal->value.getType() == Field::Types::UInt64) { - value = literal->value.get(); + value = literal->value.safeGet(); return true; } if (literal->value.getType() == Field::Types::Null) @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v { if (type_literal->value.getType() == Field::Types::String) { - const auto & type_str = type_literal->value.get(); + const auto & type_str = type_literal->value.safeGet(); if (type_str == "UInt8" || type_str == "Nullable(UInt8)") return tryExtractConstValueFromCondition(expr_list->children.at(0), value); } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 54515ea072a..86cec8659f5 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -72,7 +72,7 @@ bool shardContains( if (sharding_value.isNull()) return false; - UInt64 value = sharding_value.get(); + UInt64 value = sharding_value.safeGet(); const auto shard_num = data.slots[value % data.slots.size()] + 1; return data.shard_info.shard_num == shard_num; } @@ -120,11 +120,20 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d else if (auto * tuple_literal = right->as(); tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple) { - auto & tuple = tuple_literal->value.get(); - std::erase_if(tuple, [&](auto & child) + auto & tuple = tuple_literal->value.safeGet(); + if (tuple.size() > 1) { - return tuple.size() > 1 && !shardContains(child, name, data); - }); + Tuple new_tuple; + + for (auto & child : tuple) + if (shardContains(child, name, data)) + new_tuple.emplace_back(std::move(child)); + + if (new_tuple.empty()) + new_tuple.emplace_back(std::move(tuple.back())); + + tuple_literal->value = std::move(new_tuple); + } } } @@ -159,7 +168,7 @@ public: { if (isTuple(constant->getResultType())) { - const auto & tuple = constant->getValue().get(); + const auto & tuple = constant->getValue().safeGet(); Tuple new_tuple; new_tuple.reserve(tuple.size()); diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 9d2891e11eb..22bc14856c4 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -10,7 +10,7 @@ void PeriodicLog::startCollect(size_t collect_interval_milliseconds_ { collect_interval_milliseconds = collect_interval_milliseconds_; is_shutdown_metric_thread = false; - flush_thread = std::make_unique([this] { threadFunction(); }); + collecting_thread = std::make_unique([this] { threadFunction(); }); } template @@ -19,15 +19,15 @@ void PeriodicLog::stopCollect() bool old_val = false; if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) return; - if (flush_thread) - flush_thread->join(); + if (collecting_thread) + collecting_thread->join(); } template void PeriodicLog::shutdown() { stopCollect(); - this->stopFlushThread(); + Base::shutdown(); } template diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h index 08c3f7eb23f..8254a02434a 100644 --- a/src/Interpreters/PeriodicLog.h +++ b/src/Interpreters/PeriodicLog.h @@ -17,6 +17,7 @@ template class PeriodicLog : public SystemLog { using SystemLog::SystemLog; + using Base = SystemLog; public: using TimePoint = std::chrono::system_clock::time_point; @@ -24,18 +25,18 @@ public: /// Launches a background thread to collect metrics with interval void startCollect(size_t collect_interval_milliseconds_); - /// Stop background thread - void stopCollect(); - void shutdown() final; protected: + /// Stop background thread + void stopCollect(); + virtual void stepFunction(TimePoint current_time) = 0; private: void threadFunction(); - std::unique_ptr flush_thread; + std::unique_ptr collecting_thread; size_t collect_interval_milliseconds; std::atomic is_shutdown_metric_thread{false}; }; diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp index 4a541c3765a..272e1ac735f 100644 --- a/src/Interpreters/RewriteCountVariantsVisitor.cpp +++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp @@ -53,7 +53,7 @@ void RewriteCountVariantsVisitor::visit(ASTFunction & func) { if (first_arg_literal->value.getType() == Field::Types::UInt64) { - auto constant = first_arg_literal->value.get(); + auto constant = first_arg_literal->value.safeGet(); if (constant == 1 && !context->getSettingsRef().aggregate_functions_null_for_empty) transform = true; } diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 9785ba46dab..1b397ab56ef 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -144,7 +144,7 @@ public: return low; } - RowRef findAsof(const IColumn & asof_column, size_t row_num) override + RowRef * findAsof(const IColumn & asof_column, size_t row_num) override { sort(); @@ -156,10 +156,10 @@ public: if (pos != entries.size()) { size_t row_ref_index = entries[pos].row_ref_index; - return row_refs[row_ref_index]; + return &row_refs[row_ref_index]; } - return {nullptr, 0}; + return nullptr; } private: diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 650b2311ba7..7c98c47dd11 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -122,7 +122,7 @@ struct RowRefList : RowRef }; RowRefList() {} /// NOLINT - RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} + RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_), rows(1) {} ForwardIterator begin() const { return ForwardIterator(this); } @@ -135,8 +135,11 @@ struct RowRefList : RowRef *next = Batch(nullptr); } next = next->insert(std::move(row_ref), pool); + ++rows; } +public: + SizeT rows = 0; private: Batch * next = nullptr; }; @@ -158,7 +161,7 @@ struct SortedLookupVectorBase virtual void insert(const IColumn &, const Block *, size_t) = 0; // This needs to be synchronized internally - virtual RowRef findAsof(const IColumn &, size_t) = 0; + virtual RowRef * findAsof(const IColumn &, size_t) = 0; }; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 0615a2a1d62..866f5ba8c0a 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -105,9 +105,10 @@ ColumnsDescription SessionLogElement::getColumnsDescription() {"MySQL", static_cast(Interface::MYSQL)}, {"PostgreSQL", static_cast(Interface::POSTGRESQL)}, {"Local", static_cast(Interface::LOCAL)}, - {"TCP_Interserver", static_cast(Interface::TCP_INTERSERVER)} + {"TCP_Interserver", static_cast(Interface::TCP_INTERSERVER)}, + {"Prometheus", static_cast(Interface::PROMETHEUS)}, }); - static_assert(magic_enum::enum_count() == 7); + static_assert(magic_enum::enum_count() == 8); auto lc_string_datatype = std::make_shared(std::make_shared()); diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 483112df6cb..95b76c60063 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -125,20 +124,32 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl { auto & first_chunk = input_chunks[0]; Columns columns = first_chunk.detachColumns(); + mutable_columns.reserve(columns.size()); for (auto & column : columns) - { mutable_columns.push_back(IColumn::mutate(std::move(column))); - mutable_columns.back()->reserve(rows); - } } + size_t num_columns = mutable_columns.size(); + /// Collect the list of source columns for each column. + std::vector source_columns_list(num_columns, Columns{}); + for (size_t i = 0; i != num_columns; ++i) + source_columns_list[i].reserve(input_chunks.size() - 1); + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + auto columns = input_chunks[i].detachColumns(); + for (size_t j = 0; j != num_columns; ++j) + source_columns_list[j].emplace_back(std::move(columns[j])); + } + + for (size_t i = 0; i != num_columns; ++i) + { + /// We know all the data we will insert in advance and can make all necessary pre-allocations. + mutable_columns[i]->prepareForSquashing(source_columns_list[i]); + for (auto & source_column : source_columns_list[i]) { - const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + auto column = std::move(source_column); + mutable_columns[i]->insertRangeFrom(*column, 0, column->size()); } } diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index f9afbc7b98d..ad55d16e284 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -27,7 +27,6 @@ class ASTQueryWithTableAndOutput; class ASTTableIdentifier; class Context; -// TODO(ilezhankin): refactor and merge |ASTTableIdentifier| struct StorageID { String database_name; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 572481e6b12..6a3ec197c6e 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -49,6 +50,7 @@ #include + namespace DB { @@ -282,85 +284,21 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { - query_log = createSystemLog(global_context, "system", "query_log", config, "query_log", "Contains information about executed queries, for example, start time, duration of processing, error messages."); - query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log", "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing."); - part_log = createSystemLog(global_context, "system", "part_log", config, "part_log", "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data."); - trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log", "Contains stack traces collected by the sampling query profiler."); - crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); - text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); - metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); - error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); - filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); - filesystem_read_prefetches_log = createSystemLog( - global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); - asynchronous_metric_log = createSystemLog( - global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default)."); - opentelemetry_span_log = createSystemLog( - global_context, "system", "opentelemetry_span_log", config, - "opentelemetry_span_log", "Contains information about trace spans for executed queries."); - query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log", "Contains information about the dependent views executed when running a query, for example, the view type or the execution time."); - zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log", "This table contains information about the parameters of the request to the ZooKeeper server and the response from it."); - session_log = createSystemLog(global_context, "system", "session_log", config, "session_log", "Contains information about all successful and failed login and logout events."); - transactions_info_log = createSystemLog( - global_context, "system", "transactions_info_log", config, "transactions_info_log", "Contains information about all transactions executed on a current server."); - processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); - asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); - backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define CREATE_PUBLIC_MEMBERS(log_type, member, descr) \ + member = createSystemLog(global_context, "system", #member, config, #member, descr); \ + + LIST_OF_ALL_SYSTEM_LOGS(CREATE_PUBLIC_MEMBERS) +#undef CREATE_PUBLIC_MEMBERS +/// NOLINTEND(bugprone-macro-parentheses) - if (query_log) - logs.emplace_back(query_log.get()); - if (query_thread_log) - logs.emplace_back(query_thread_log.get()); - if (part_log) - logs.emplace_back(part_log.get()); - if (trace_log) - logs.emplace_back(trace_log.get()); - if (crash_log) - logs.emplace_back(crash_log.get()); - if (text_log) - logs.emplace_back(text_log.get()); - if (metric_log) - logs.emplace_back(metric_log.get()); - if (error_log) - logs.emplace_back(error_log.get()); - if (asynchronous_metric_log) - logs.emplace_back(asynchronous_metric_log.get()); - if (opentelemetry_span_log) - logs.emplace_back(opentelemetry_span_log.get()); - if (query_views_log) - logs.emplace_back(query_views_log.get()); - if (zookeeper_log) - logs.emplace_back(zookeeper_log.get()); if (session_log) - { - logs.emplace_back(session_log.get()); global_context->addWarningMessage("Table system.session_log is enabled. It's unreliable and may contain garbage. Do not use it for any kind of security monitoring."); - } - if (transactions_info_log) - logs.emplace_back(transactions_info_log.get()); - if (processors_profile_log) - logs.emplace_back(processors_profile_log.get()); - if (filesystem_cache_log) - logs.emplace_back(filesystem_cache_log.get()); - if (filesystem_read_prefetches_log) - logs.emplace_back(filesystem_read_prefetches_log.get()); - if (asynchronous_insert_log) - logs.emplace_back(asynchronous_insert_log.get()); - if (backup_log) - logs.emplace_back(backup_log.get()); - if (s3_queue_log) - logs.emplace_back(s3_queue_log.get()); - if (blob_storage_log) - logs.emplace_back(blob_storage_log.get()); bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { - for (auto & log : logs) + for (auto & log : getAllLogs()) { log->startup(); if (should_prepare) @@ -394,20 +332,54 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf } } - -SystemLogs::~SystemLogs() +std::vector SystemLogs::getAllLogs() const { +#define GET_RAW_POINTERS(log_type, member, descr) \ + (member).get(), \ + + std::vector result = { + LIST_OF_ALL_SYSTEM_LOGS(GET_RAW_POINTERS) + }; +#undef GET_RAW_POINTERS + + auto last_it = std::remove(result.begin(), result.end(), nullptr); + result.erase(last_it, result.end()); + + return result; +} + +void SystemLogs::flush(bool should_prepare_tables_anyway) +{ + auto logs = getAllLogs(); + std::vector logs_indexes(logs.size(), 0); + + for (size_t i = 0; i < logs.size(); ++i) + { + auto last_log_index = logs[i]->getLastLogIndex(); + logs_indexes[i] = last_log_index; + logs[i]->notifyFlush(last_log_index, should_prepare_tables_anyway); + } + + for (size_t i = 0; i < logs.size(); ++i) + logs[i]->flush(logs_indexes[i], should_prepare_tables_anyway); +} + +void SystemLogs::flushAndShutdown() +{ + flush(/* should_prepare_tables_anyway */ false); shutdown(); } void SystemLogs::shutdown() { + auto logs = getAllLogs(); for (auto & log : logs) log->shutdown(); } void SystemLogs::handleCrash() { + auto logs = getAllLogs(); for (auto & log : logs) log->handleCrash(); } @@ -430,65 +402,39 @@ SystemLog::SystemLog( template void SystemLog::shutdown() { - stopFlushThread(); + Base::stopFlushThread(); auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) table->flushAndShutdown(); } -template -void SystemLog::stopFlushThread() -{ - { - std::lock_guard lock(thread_mutex); - - if (!saving_thread || !saving_thread->joinable()) - return; - - if (is_shutdown) - return; - - is_shutdown = true; - queue->shutdown(); - } - - saving_thread->join(); -} - template void SystemLog::savingThreadFunction() { setThreadName("SystemLogFlush"); - std::vector to_flush; - bool exit_this_thread = false; - while (!exit_this_thread) + while (true) { try { - // The end index (exclusive, like std end()) of the messages we are - // going to flush. - uint64_t to_flush_end = 0; - // Should we prepare table even if there are no new messages. - bool should_prepare_tables_anyway = false; + auto result = queue->pop(); - to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread); - - if (to_flush.empty()) + if (result.is_shutdown) { - if (should_prepare_tables_anyway) - { - prepareTable(); - LOG_TRACE(log, "Table created (force)"); - - queue->confirm(to_flush_end); - } + LOG_TRACE(log, "Terminating"); + return; } - else + + if (!result.logs.empty()) { - flushImpl(to_flush, to_flush_end); + flushImpl(result.logs, result.last_log_index); + } + else if (result.create_table_force) + { + prepareTable(); + queue->confirm(result.last_log_index); } } catch (...) @@ -496,7 +442,6 @@ void SystemLog::savingThreadFunction() tryLogCurrentException(__PRETTY_FUNCTION__); } } - LOG_TRACE(log, "Terminating"); } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 0ac468b15ec..c03f9370068 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -5,6 +5,32 @@ #include #include +#include + +#define LIST_OF_ALL_SYSTEM_LOGS(M) \ + M(QueryLog, query_log, "Contains information about executed queries, for example, start time, duration of processing, error messages.") \ + M(QueryThreadLog, query_thread_log, "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing.") \ + M(PartLog, part_log, "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data.") \ + M(TraceLog, trace_log, "Contains stack traces collected by the sampling query profiler.") \ + M(CrashLog, crash_log, "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur.") \ + M(TextLog, text_log, "Contains logging entries which are normally written to a log file or to stdout.") \ + M(MetricLog, metric_log, "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk.") \ + M(ErrorLog, error_log, "Contains history of error values from table system.errors, periodically flushed to disk.") \ + M(FilesystemCacheLog, filesystem_cache_log, "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem.") \ + M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem.") \ + M(ObjectStorageQueueLog, s3queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(ObjectStorageQueueLog, azure_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(AsynchronousMetricLog, asynchronous_metric_log, "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default).") \ + M(OpenTelemetrySpanLog, opentelemetry_span_log, "Contains information about trace spans for executed queries.") \ + M(QueryViewsLog, query_views_log, "Contains information about the dependent views executed when running a query, for example, the view type or the execution time.") \ + M(ZooKeeperLog, zookeeper_log, "This table contains information about the parameters of the request to the ZooKeeper server and the response from it.") \ + M(SessionLog, session_log, "Contains information about all successful and failed login and logout events.") \ + M(TransactionsInfoLog, transactions_info_log, "Contains information about all transactions executed on a current server.") \ + M(ProcessorsProfileLog, processors_profile_log, "Contains profiling information on processors level (building blocks for a pipeline for query execution.") \ + M(AsynchronousInsertLog, asynchronous_insert_log, "Contains a history for all asynchronous inserts executed on current server.") \ + M(BackupLog, backup_log, "Contains logging entries with the information about BACKUP and RESTORE operations.") \ + M(BlobStorageLog, blob_storage_log, "Contains logging entries with information about various blob storage operations such as uploads and deletes.") \ + namespace DB { @@ -34,71 +60,37 @@ namespace DB }; */ -class QueryLog; -class QueryThreadLog; -class PartLog; -class TextLog; -class TraceLog; -class CrashLog; -class ErrorLog; -class MetricLog; -class AsynchronousMetricLog; -class OpenTelemetrySpanLog; -class QueryViewsLog; -class ZooKeeperLog; -class SessionLog; -class TransactionsInfoLog; -class ProcessorsProfileLog; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class AsynchronousInsertLog; -class BackupLog; -class ObjectStorageQueueLog; -class BlobStorageLog; +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define FORWARD_DECLARATION(log_type, member, descr) \ + class log_type; \ + +LIST_OF_ALL_SYSTEM_LOGS(FORWARD_DECLARATION) +#undef FORWARD_DECLARATION +/// NOLINTEND(bugprone-macro-parentheses) + /// System logs should be destroyed in destructor of the last Context and before tables, /// because SystemLog destruction makes insert query while flushing data into underlying tables -struct SystemLogs +class SystemLogs { +public: + SystemLogs() = default; SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config); - ~SystemLogs(); + SystemLogs(const SystemLogs & other) = default; + void flush(bool should_prepare_tables_anyway); + void flushAndShutdown(); void shutdown(); void handleCrash(); - std::shared_ptr query_log; /// Used to log queries. - std::shared_ptr query_thread_log; /// Used to log query threads. - std::shared_ptr part_log; /// Used to log operations with parts - std::shared_ptr trace_log; /// Used to log traces from query profiler - std::shared_ptr crash_log; /// Used to log server crashes. - std::shared_ptr text_log; /// Used to log all text messages. - std::shared_ptr metric_log; /// Used to log all metrics. - std::shared_ptr error_log; /// Used to log errors. - std::shared_ptr filesystem_cache_log; - std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; - std::shared_ptr azure_queue_log; - /// Metrics from system.asynchronous_metrics. - std::shared_ptr asynchronous_metric_log; - /// OpenTelemetry trace spans. - std::shared_ptr opentelemetry_span_log; - /// Used to log queries of materialized and live views - std::shared_ptr query_views_log; - /// Used to log all actions of ZooKeeper client - std::shared_ptr zookeeper_log; - /// Login, LogOut and Login failure events - std::shared_ptr session_log; - /// Events related to transactions - std::shared_ptr transactions_info_log; - /// Used to log processors profiling - std::shared_ptr processors_profile_log; - std::shared_ptr asynchronous_insert_log; - /// Backup and restore events - std::shared_ptr backup_log; - /// Log blob storage operations - std::shared_ptr blob_storage_log; +#define DECLARE_PUBLIC_MEMBERS(log_type, member, descr) \ + std::shared_ptr member; \ - std::vector logs; + LIST_OF_ALL_SYSTEM_LOGS(DECLARE_PUBLIC_MEMBERS) +#undef DECLARE_PUBLIC_MEMBERS + +private: + std::vector getAllLogs() const; }; struct SystemLogSettings @@ -133,8 +125,6 @@ public: void shutdown() override; - void stopFlushThread() override; - /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. @@ -144,9 +134,6 @@ public: protected: LoggerPtr log; - using ISystemLog::is_shutdown; - using ISystemLog::saving_thread; - using ISystemLog::thread_mutex; using Base::queue; StoragePtr getStorage() const; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index c8c926db13c..138085f0710 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -115,6 +115,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, Temporary , partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes) , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) + , output_by_rowlist_perkey_rows_threshold(settings.join_output_by_rowlist_perkey_rows_threshold) , max_memory_usage(settings.max_memory_usage) , tmp_volume(tmp_volume_) , tmp_data(tmp_data_) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 3f2bebb5816..4d626084d81 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -148,6 +148,7 @@ private: const size_t partial_merge_join_left_table_buffer_bytes = 0; const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; + const size_t output_by_rowlist_perkey_rows_threshold = 0; /// Value if setting max_memory_usage for query, can be used when max_bytes_in_join is not specified. size_t max_memory_usage = 0; @@ -295,6 +296,7 @@ public: return join_use_nulls && isRightOrFull(kind()); } + size_t outputByRowListPerkeyRowsThreshold() const { return output_by_rowlist_perkey_rows_threshold; } size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxJoinedBlockRows() const { return max_joined_block_rows; } size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 7f0fb8cd6ca..3259d7b67d6 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -65,7 +65,7 @@ TemporaryDataOnDisk::TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, Cu std::unique_ptr TemporaryDataOnDisk::createRawStream(size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); return std::make_unique(std::move(holder)); @@ -81,7 +81,7 @@ std::unique_ptr TemporaryDataOnDisk::createRawStream(si TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 1f7c6b1fe68..ca8f8d235fa 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -473,11 +473,11 @@ void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_re { if (global_profiler_real_time_period > 0) query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_real_time_period)); + /* period= */ global_profiler_real_time_period); if (global_profiler_cpu_time_period > 0) query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_cpu_time_period)); + /* period= */ global_profiler_cpu_time_period); } catch (...) { @@ -506,18 +506,18 @@ void ThreadStatus::initQueryProfiler() { if (!query_profiler_real) query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_real_time_period_ns)); + /* period= */ settings.query_profiler_real_time_period_ns); else - query_profiler_real->setPeriod(static_cast(settings.query_profiler_real_time_period_ns)); + query_profiler_real->setPeriod(settings.query_profiler_real_time_period_ns); } if (settings.query_profiler_cpu_time_period_ns > 0) { if (!query_profiler_cpu) query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_cpu_time_period_ns)); + /* period= */ settings.query_profiler_cpu_time_period_ns); else - query_profiler_cpu->setPeriod(static_cast(settings.query_profiler_cpu_time_period_ns)); + query_profiler_cpu->setPeriod(settings.query_profiler_cpu_time_period_ns); } } catch (...) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b872eb94fde..6483dd3be48 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -184,7 +184,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) const auto & value = group_exprs[i]->as()->value; if (value.getType() == Field::Types::UInt64) { - auto pos = value.get(); + auto pos = value.safeGet(); if (pos > 0 && pos <= select_query->select()->children.size()) keep_position = true; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 14094c3cccf..f31522ae649 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -47,10 +47,10 @@ #include #include -#include -#include -#include #include +#include +#include +#include #include #include @@ -1173,9 +1173,9 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (object_pos != std::string::npos) { String object_name = it->substr(0, object_pos); - if (pair.name == object_name && pair.type->getTypeId() == TypeIndex::Object) + if (pair.name == object_name && pair.type->getTypeId() == TypeIndex::ObjectDeprecated) { - const auto * object_type = typeid_cast(pair.type.get()); + const auto * object_type = typeid_cast(pair.type.get()); if (object_type->getSchemaFormat() == "json" && object_type->hasNullableSubcolumns()) { missed_subcolumns.insert(*it); diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 31a881001e3..b1e12ff8048 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -94,8 +94,8 @@ void WindowFrame::checkValid() const if (begin_type == BoundaryType::Offset && !((begin_offset.getType() == Field::Types::UInt64 || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) + && begin_offset.safeGet() >= 0 + && begin_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", @@ -107,8 +107,8 @@ void WindowFrame::checkValid() const if (end_type == BoundaryType::Offset && !((end_offset.getType() == Field::Types::UInt64 || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) + && end_offset.safeGet() >= 0 + && end_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp index 906dfb84b14..a779c9bc34d 100644 --- a/src/Interpreters/castColumn.cpp +++ b/src/Interpreters/castColumn.cpp @@ -26,11 +26,9 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar "" } }; - auto get_cast_func = [cast_type, &arguments] + auto get_cast_func = [from = arg, to = type, cast_type] { - - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(cast_type, {}); - return func_builder_cast->build(arguments); + return createInternalCast(from, to, cast_type, {}); }; FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func(); diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 1a7c166c6a5..7e1b4e2fb0e 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -57,7 +57,7 @@ template Field convertNumericTypeImpl(const Field & from) { To result; - if (!accurate::convertNumeric(from.get(), result)) + if (!accurate::convertNumeric(from.safeGet(), result)) return {}; return result; } @@ -88,7 +88,7 @@ Field convertNumericType(const Field & from, const IDataType & type) template Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -100,7 +100,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ template Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & type) { - const String & str_value = from.get(); + const String & str_value = from.safeGet(); T value = type.parseFromString(str_value); return DecimalField(value, type.getScale()); } @@ -108,7 +108,7 @@ Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & template Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & type) { - auto field = from.get>(); + auto field = from.safeGet>(); T value = convertDecimals, DataTypeDecimal>(field.getValue(), field.getScale(), type.getScale()); return DecimalField(value, type.getScale()); } @@ -116,7 +116,7 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & template Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -182,24 +182,24 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDate32() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDateTime() && which_from_type.isDate()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime() && which_from_type.isDate32()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime64() && which_from_type.isDate()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.get())); + const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.safeGet())); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); @@ -207,13 +207,17 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (which_type.isDateTime64() && which_from_type.isDate32()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.get()))); + const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.safeGet()))); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); } else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String) { + /// Bool is not represented in which_type, so we need to type it separately + if (isInt64OrUInt64orBoolFieldType(src.getType()) && type.getName() == "Bool") + return bool(src.safeGet()); + if (which_type.isUInt8()) return convertNumericType(src, type); if (which_type.isUInt16()) return convertNumericType(src, type); if (which_type.isUInt32()) return convertNumericType(src, type); @@ -253,7 +257,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { - const auto & from_type = src.get(); + const auto & from_type = src.safeGet(); const auto & to_type = static_cast(type); const auto scale_from = from_type.getScale(); @@ -300,8 +304,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isIPv4() && src.getType() == Field::Types::UInt64) { - /// convert to UInt32 which is the underlying type for native IPv4 - return convertNumericType(src, type); + /// convert through UInt32 which is the underlying type for native IPv4 + return static_cast(convertNumericType(src, type).safeGet()); } } else if (which_type.isUUID() && src.getType() == Field::Types::UUID) @@ -318,7 +322,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_from_type.isFixedString() && assert_cast(from_type_hint)->getN() == IPV6_BINARY_LENGTH) { const auto col = type.createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); type.getDefaultSerialization()->deserializeBinary(*col, in_buffer, {}); return (*col)[0]; } @@ -330,7 +334,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isFixedString()) { size_t n = assert_cast(type).getN(); - const auto & src_str = src.get(); + const auto & src_str = src.safeGet(); if (src_str.size() < n) { String src_str_extended = src_str; @@ -347,7 +351,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Array) { - const Array & src_arr = src.get(); + const Array & src_arr = src.safeGet(); size_t src_arr_size = src_arr.size(); const auto & element_type = *(type_array->getNestedType()); @@ -370,7 +374,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Tuple) { - const auto & src_tuple = src.get(); + const auto & src_tuple = src.safeGet(); size_t src_tuple_size = src_tuple.size(); size_t dst_tuple_size = type_tuple->getElements().size(); @@ -415,7 +419,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto & key_type = *type_map->getKeyType(); const auto & value_type = *type_map->getValueType(); - const auto & map = src.get(); + const auto & map = src.safeGet(); size_t map_size = map.size(); Map res(map_size); @@ -424,7 +428,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID for (size_t i = 0; i < map_size; ++i) { - const auto & map_entry = map[i].get(); + const auto & map_entry = map[i].safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -453,13 +457,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID "Cannot convert {} to {}", src.getTypeName(), agg_func_type->getName()); - const auto & name = src.get().name; + const auto & name = src.safeGet().name; if (agg_func_type->getName() != name) throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName()); return src; } - else if (isObject(type)) + else if (isObjectDeprecated(type)) { if (src.getType() == Field::Types::Object) return src; /// Already in needed type. @@ -468,7 +472,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames()) { const auto & names = from_type_tuple->getElementNames(); - const auto & tuple = src.get(); + const auto & tuple = src.safeGet(); if (names.size() != tuple.size()) throw Exception(ErrorCodes::TYPE_MISMATCH, @@ -485,10 +489,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Map) { Object object; - const auto & map = src.get(); + const auto & map = src.safeGet(); for (const auto & element : map) { - const auto & map_entry = element.get(); + const auto & map_entry = element.safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -496,7 +500,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert from Map with key of type {} to Object", key.getTypeName()); - object[key.get()] = value; + object[key.safeGet()] = value; } return object; @@ -519,6 +523,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// We can insert any field to Dynamic column. return src; } + else if (isObject(type)) + { + if (src.getType() == Field::Types::Object) + return src; /// Already in needed type. + + /// TODO: add conversion from Map/Tuple to Object. + } /// Conversion from string by parsing. if (src.getType() == Field::Types::String) @@ -537,7 +548,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } const auto col = type_to_parse->createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); try { type_to_parse->getDefaultSerialization()->deserializeWholeText(*col, in_buffer, FormatSettings{}); @@ -545,9 +556,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID catch (Exception & e) { if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get(), type.getName()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.safeGet(), type.getName()); - e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); + e.addMessage(fmt::format("while converting '{}' to {}", src.safeGet(), type.getName())); throw; } @@ -610,7 +621,7 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t template static bool decimalEqualsFloat(Field field, Float64 float_value) { - auto decimal_field = field.get>(); + auto decimal_field = field.safeGet>(); auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); return decimal_to_float == float_value; } @@ -629,13 +640,13 @@ std::optional convertFieldToTypeStrict(const Field & from_value, const ID { /// Convert back to Float64 and compare if (result_value.getType() == Field::Types::Decimal32) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal64) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal128) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal256) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4bfc80af1fe..d4bb0cc2f8a 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -297,7 +297,7 @@ namespace { if (tuple_literal->value.getType() == Field::Types::Tuple) { - const auto & tuple = tuple_literal->value.get(); + const auto & tuple = tuple_literal->value.safeGet(); for (const auto & child : tuple) { const auto dnf = analyzeEquals(identifier, child, expr); @@ -792,7 +792,7 @@ std::optional evaluateExpressionOverConstantCondition(const ASTPtr & nod else if (const auto * literal = node->as()) { // Check if it's always true or false. - if (literal->value.getType() == Field::Types::UInt64 && literal->value.get() == 0) + if (literal->value.getType() == Field::Types::UInt64 && literal->value.safeGet() == 0) return {result}; else return {}; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index fe87eed5570..decc16a3704 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -786,7 +786,7 @@ static std::tuple executeQueryImpl( /// Verify that AST formatting is consistent: /// If you format AST, parse it back, and format it again, you get the same string. - String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false); /// The query can become more verbose after formatting, so: size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0; @@ -811,7 +811,7 @@ static std::tuple executeQueryImpl( chassert(ast2); - String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false); if (formatted1 != formatted2) throw Exception(ErrorCodes::LOGICAL_ERROR, diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index ea8c295b169..14e84f1d1a4 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -25,7 +25,8 @@ inline String format(const SecretHidingFormatSettings & settings) && settings.ctx->getSettingsRef().format_display_secrets_in_show_and_select && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect); - return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets); + return settings.query.formatWithPossiblyHidingSensitiveData( + settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names); } } diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index 54c5c7dc83a..4aabc49c29b 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -12,38 +12,53 @@ namespace DB { -template +/// HashJoin::MapsOne is more efficient, it only store one row for each key in the map. It is recommended to use it whenever possible. +/// When only need to match only one row from right table, use HashJoin::MapsOne. For example, LEFT ANY/SEMI/ANTI. +/// +/// HashJoin::MapsAll will store all rows for each key in the map. It is used when need to match multiple rows from right table. +/// For example, LEFT ALL, INNER ALL, RIGHT ALL/ANY. +/// +/// prefer_use_maps_all is true when there is mixed inequal condition in the join condition. For example, `t1.a = t2.a AND t1.b > t2.b`. +/// In this case, we need to use HashJoin::MapsAll to store all rows for each key in the map. We will select all matched rows from the map +/// and filter them by `t1.b > t2.b`. +/// +/// flagged indicates whether we need to store flags for each row whether it has been used in the join. See JoinUsedFlags.h. +template struct MapGetter; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +/// Only ANTI LEFT and ANTI RIGHT are valid. INNER and FULL are here for templates instantiation. +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template -struct MapGetter { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; }; +template +struct MapGetter { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; }; static constexpr std::array STRICTNESSES = { JoinStrictness::RightAny, @@ -62,7 +77,7 @@ static constexpr std::array KINDS = { }; /// Init specified join map -inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps) +inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps, bool prefer_use_maps_all = false) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -70,7 +85,10 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin: constexpr auto j = ij % STRICTNESSES.size(); if (kind == KINDS[i] && strictness == STRICTNESSES[j]) { - maps = typename MapGetter::Map(); + if (prefer_use_maps_all) + maps = typename MapGetter::Map(); + else + maps = typename MapGetter::Map(); return true; } return false; @@ -79,7 +97,7 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin: /// Call function on specified join map template -inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, Func && func) +inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, bool prefer_use_maps_all, Func && func) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -89,10 +107,16 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & constexpr auto j = ij % STRICTNESSES.size(); if (kind == KINDS[i] && strictness == STRICTNESSES[j]) { - func( - std::integral_constant(), - std::integral_constant(), - std::get::Map>(maps)); + if (prefer_use_maps_all) + func( + std::integral_constant(), + std::integral_constant(), + std::get::Map>(maps)); + else + func( + std::integral_constant(), + std::integral_constant(), + std::get::Map>(maps)); return true; } return false; @@ -101,7 +125,7 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & /// Call function on specified join map template -inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector & mapsv, Func && func) +inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector & mapsv, bool prefer_use_maps_all, Func && func) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -111,17 +135,31 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector::Map; - std::vector v; - v.reserve(mapsv.size()); - for (const auto & el : mapsv) - v.push_back(&std::get(*el)); + if (prefer_use_maps_all) + { + using MapType = typename MapGetter::Map; + std::vector v; + v.reserve(mapsv.size()); + for (const auto & el : mapsv) + v.push_back(&std::get(*el)); - func( - std::integral_constant(), - std::integral_constant(), - v - /*std::get::Map>(maps)*/); + func( + std::integral_constant(), std::integral_constant(), v + /*std::get::Map>(maps)*/); + } + else + { + using MapType = typename MapGetter::Map; + std::vector v; + v.reserve(mapsv.size()); + for (const auto & el : mapsv) + v.push_back(&std::get(*el)); + + func( + std::integral_constant(), std::integral_constant(), v + /*std::get::Map>(maps)*/); + + } return true; } return false; diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index b9fdaabede1..0c6d18dca70 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ DataTypeValidationSettings::DataTypeValidationSettings(const DB::Settings& setti , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) + , allow_experimental_json_type(settings.allow_experimental_json_type) { } @@ -123,7 +125,7 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio if (!settings.allow_experimental_dynamic_type) { - if (data_type.hasDynamicSubcolumns()) + if (isDynamic(data_type)) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -132,6 +134,19 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio data_type.getName()); } } + + if (!settings.allow_experimental_json_type) + { + const auto * object_type = typeid_cast(&data_type); + if (object_type && object_type->getSchemaFormat() == DataTypeObject::SchemaFormat::JSON) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental JSON type is not allowed. " + "Set setting allow_experimental_json_type = 1 in order to allow it", + data_type.getName()); + } + } }; validate_callback(*type_to_check); diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index 6f15c585e4f..6e00492c0ad 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -23,6 +23,7 @@ struct DataTypeValidationSettings bool allow_suspicious_variant_types = true; bool validate_nested_types = true; bool allow_experimental_dynamic_type = true; + bool allow_experimental_json_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index 3d60723a167..ee967f45c74 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -35,11 +35,11 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel if (which == Field::Types::UInt64) { - pos = ast_literal->value.get(); + pos = ast_literal->value.safeGet(); } else if (which == Field::Types::Int64) { - auto value = ast_literal->value.get(); + auto value = ast_literal->value.safeGet(); if (value > 0) pos = value; else diff --git a/src/Interpreters/tests/gtest_comparison_graph.cpp b/src/Interpreters/tests/gtest_comparison_graph.cpp index ac24a8de368..5f93bb983c1 100644 --- a/src/Interpreters/tests/gtest_comparison_graph.cpp +++ b/src/Interpreters/tests/gtest_comparison_graph.cpp @@ -29,7 +29,7 @@ TEST(ComparisonGraph, Bounds) const auto & [lower, strict] = *res; - ASSERT_EQ(lower.get(), 3); + ASSERT_EQ(lower.safeGet(), 3); ASSERT_TRUE(strict); } @@ -39,7 +39,7 @@ TEST(ComparisonGraph, Bounds) const auto & [upper, strict] = *res; - ASSERT_EQ(upper.get(), 7); + ASSERT_EQ(upper.safeGet(), 7); ASSERT_TRUE(strict); } diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 36acc319f4e..bb3df734b2a 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,7 @@ #include #include +using namespace std::chrono_literals; namespace fs = std::filesystem; using namespace DB; @@ -246,7 +248,8 @@ void download(FileSegment & file_segment) ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000)); + std::string failure_reason; + ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason)); download(cache_base_path, file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADING); @@ -258,7 +261,8 @@ void assertDownloadFails(FileSegment & file_segment) { ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000)); + std::string failure_reason; + ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason)); file_segment.complete(); } @@ -358,9 +362,11 @@ TEST_F(FileCacheTest, LRUPolicy) settings.max_size = 30; settings.max_elements = 5; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; const size_t file_size = INT_MAX; // the value doesn't really matter because boundary_alignment == 1. + const auto user = FileCache::getCommonUser(); { std::cerr << "Step 1\n"; @@ -815,6 +821,7 @@ TEST_F(FileCacheTest, writeBuffer) settings.max_elements = 5; settings.max_file_segment_size = 5; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; FileCache cache("6", settings); cache.initialize(); @@ -946,6 +953,7 @@ TEST_F(FileCacheTest, temporaryData) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("7", settings); file_cache.initialize(); @@ -957,10 +965,11 @@ TEST_F(FileCacheTest, temporaryData) { ASSERT_EQ(some_data_holder->size(), 5); + std::string failure_reason; for (auto & segment : *some_data_holder) { ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); - ASSERT_TRUE(segment->reserve(segment->range().size(), 1000)); + ASSERT_TRUE(segment->reserve(segment->range().size(), 1000, failure_reason)); download(*segment); segment->complete(); } @@ -1073,6 +1082,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) settings.max_size = 30; settings.max_elements = 10; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; ReadSettings read_settings; read_settings.enable_filesystem_cache = true; @@ -1092,6 +1102,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) auto cache = std::make_shared("8", settings); cache->initialize(); + auto key = cache->createKeyForPath(file_path); const auto user = FileCache::getCommonUser(); @@ -1132,6 +1143,7 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("cache", settings); file_cache.initialize(); @@ -1195,6 +1207,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings.max_size = 40; settings.max_elements = 6; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; settings.cache_policy = "SLRU"; settings.slru_size_ratio = 0.5; @@ -1307,6 +1320,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings2.boundary_alignment = 1; settings2.cache_policy = "SLRU"; settings2.slru_size_ratio = 0.5; + settings.load_metadata_asynchronously = false; auto cache = std::make_shared("slru_2", settings2); cache->initialize(); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c96499095d5..23d653012f8 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -66,17 +66,13 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo { frame.need_parens = false; - /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. + /// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query. format_settings.ostr << backQuote(name); if (type) { format_settings.ostr << ' '; - - FormatStateStacked type_frame = frame; - type_frame.indent = 0; - - type->formatImpl(format_settings, state, type_frame); + type->formatImpl(format_settings, state, frame); } if (null_modifier) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index f0f782c0a63..d7f5b8f9702 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -256,6 +256,8 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->dictionary, dictionary->clone()); } + if (refresh_strategy) + res->set(res->refresh_strategy, refresh_strategy->clone()); if (as_table_function) res->set(res->as_table_function, as_table_function->clone()); if (comment) @@ -483,6 +485,13 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (auto to_storage = getTargetInnerEngine(ViewTarget::To)) to_storage->formatImpl(settings, state, frame); + if (targets) + { + targets->formatTarget(ViewTarget::Data, settings, state, frame); + targets->formatTarget(ViewTarget::Tags, settings, state, frame); + targets->formatTarget(ViewTarget::Metrics, settings, state, frame); + } + if (dictionary) dictionary->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index a95010aea31..6be0fa78903 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -97,6 +97,7 @@ public: bool is_materialized_view{false}; bool is_live_view{false}; bool is_window_view{false}; + bool is_time_series_table{false}; /// CREATE TABLE ... ENGINE=TimeSeries() ... bool is_populate{false}; bool is_create_empty{false}; /// CREATE TABLE ... EMPTY AS SELECT ... bool replace_view{false}; /// CREATE OR REPLACE VIEW diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp index 3c17ae8c380..4211347fb74 100644 --- a/src/Parsers/ASTDataType.cpp +++ b/src/Parsers/ASTDataType.cpp @@ -40,12 +40,22 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat { settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + if (!settings.one_line && settings.print_pretty_type_names && name == "Tuple") { - if (i != 0) - settings.ostr << ", "; - - arguments->children[i]->formatImpl(settings, state, frame); + ++frame.indent; + std::string indent_str = settings.one_line ? "" : "\n" + std::string(4 * frame.indent, ' '); + for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + { + if (i != 0) + settings.ostr << ','; + settings.ostr << indent_str; + arguments->children[i]->formatImpl(settings, state, frame); + } + } + else + { + frame.expression_list_prepend_whitespace = false; + arguments->formatImpl(settings, state, frame); } settings.ostr << (settings.hilite ? hilite_function : "") << ')'; diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 61ac482af82..f345b0c6a6f 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -42,7 +42,8 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' '); + ++frame.indent; + std::string indent_str = "\n" + std::string(4 * frame.indent, ' '); if (frame.expression_list_prepend_whitespace) { @@ -50,8 +51,6 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For settings.ostr << ' '; } - ++frame.indent; - for (size_t i = 0, size = children.size(); i < size; ++i) { if (i && separator) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index cd9e910d45a..d42728addb7 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -522,7 +522,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (tuple_arguments_valid && lit_right) { if (isInt64OrUInt64FieldType(lit_right->value.getType()) - && lit_right->value.get() >= 0) + && lit_right->value.safeGet() >= 0) { if (frame.need_parens) settings.ostr << '('; diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index dd05ad08184..72f3f017a99 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -13,8 +13,7 @@ class ASTIndexDeclaration : public IAST { public: static const auto DEFAULT_INDEX_GRANULARITY = 1uz; - static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz; - static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz; + static const auto DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY = 100'000'000uz; ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_); diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 8dedc5dc95d..515f4f0cb9f 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -73,8 +73,8 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing) - || (type == Field::Types::Tuple && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing) + || (type == Field::Types::Tuple && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); @@ -92,7 +92,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// for tons of literals as it creates temporary String. if (value.getType() == Field::Types::String) { - writeQuoted(value.get(), ostr); + writeQuoted(value.safeGet(), ostr); } else { @@ -110,7 +110,7 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const /// Special case for very large arrays. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp index e4066081a9b..1515700365f 100644 --- a/src/Parsers/ASTNameTypePair.cpp +++ b/src/Parsers/ASTNameTypePair.cpp @@ -23,12 +23,8 @@ ASTPtr ASTNameTypePair::clone() const void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - - settings.ostr << indent_str << backQuoteIfNeed(name) << ' '; + settings.ostr << backQuoteIfNeed(name) << ' '; type->formatImpl(settings, state, frame); } } - - diff --git a/src/Parsers/ASTObjectTypeArgument.cpp b/src/Parsers/ASTObjectTypeArgument.cpp new file mode 100644 index 00000000000..975f0389505 --- /dev/null +++ b/src/Parsers/ASTObjectTypeArgument.cpp @@ -0,0 +1,64 @@ +#include +#include +#include + + +namespace DB +{ + +ASTPtr ASTObjectTypeArgument::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (path_with_type) + { + res->path_with_type = path_with_type->clone(); + res->children.push_back(res->path_with_type); + } + else if (skip_path) + { + res->skip_path = skip_path->clone(); + res->children.push_back(res->skip_path); + } + else if (skip_path_regexp) + { + res->skip_path_regexp = skip_path_regexp->clone(); + res->children.push_back(res->skip_path_regexp); + } + else if (parameter) + { + res->parameter = parameter->clone(); + res->children.push_back(res->parameter); + } + + return res; +} + +void ASTObjectTypeArgument::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (path_with_type) + { + path_with_type->formatImpl(settings, state, frame); + } + else if (parameter) + { + parameter->formatImpl(settings, state, frame); + } + else if (skip_path) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << indent_str << "SKIP" << ' '; + skip_path->formatImpl(settings, state, frame); + } + else if (skip_path_regexp) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << indent_str << "SKIP REGEXP" << ' '; + skip_path_regexp->formatImpl(settings, state, frame); + } +} + +} + + diff --git a/src/Parsers/ASTObjectTypeArgument.h b/src/Parsers/ASTObjectTypeArgument.h new file mode 100644 index 00000000000..ab18d00d770 --- /dev/null +++ b/src/Parsers/ASTObjectTypeArgument.h @@ -0,0 +1,33 @@ +#pragma once + +#include + + +namespace DB +{ + +/** An argument of Object data type declaration (for example for JSON). Can contain one of: + * - pair (path, data type) + * - path that should be skipped + * - path regexp for paths that should be skipped + * - setting in a form of `setting=N` + */ +class ASTObjectTypeArgument : public IAST +{ +public: + ASTPtr path_with_type; + ASTPtr skip_path; + ASTPtr skip_path_regexp; + ASTPtr parameter; + + /** Get the text that identifies this element. */ + String getID(char) const override { return "ASTObjectTypeArgument"; } + ASTPtr clone() const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + + +} + diff --git a/src/Parsers/ASTRefreshStrategy.cpp b/src/Parsers/ASTRefreshStrategy.cpp index 2e0c6ee4638..d10c1b4e7f5 100644 --- a/src/Parsers/ASTRefreshStrategy.cpp +++ b/src/Parsers/ASTRefreshStrategy.cpp @@ -20,7 +20,6 @@ ASTPtr ASTRefreshStrategy::clone() const res->set(res->settings, settings->clone()); if (dependencies) res->set(res->dependencies, dependencies->clone()); - res->schedule_kind = schedule_kind; return res; } @@ -66,6 +65,8 @@ void ASTRefreshStrategy::formatImpl( f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " SETTINGS " << (f_settings.hilite ? hilite_none : ""); settings->formatImpl(f_settings, state, frame); } + if (append) + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " APPEND" << (f_settings.hilite ? hilite_none : ""); } } diff --git a/src/Parsers/ASTRefreshStrategy.h b/src/Parsers/ASTRefreshStrategy.h index ca248b76b40..bb5ac97c054 100644 --- a/src/Parsers/ASTRefreshStrategy.h +++ b/src/Parsers/ASTRefreshStrategy.h @@ -24,6 +24,7 @@ public: ASTTimeInterval * offset = nullptr; ASTTimeInterval * spread = nullptr; RefreshScheduleKind schedule_kind{RefreshScheduleKind::UNKNOWN}; + bool append = false; String getID(char) const override { return "Refresh strategy definition"; } diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index d51c382f374..39fc4f787ec 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -141,6 +141,19 @@ public: QueryKind getQueryKind() const override { return QueryKind::Rename; } + void addElement(const String & from_db, const String & from_table, const String & to_db, const String & to_table) + { + auto identifier = [&](const String & name) -> ASTPtr + { + if (name.empty()) + return nullptr; + ASTPtr ast = std::make_shared(name); + children.push_back(ast); + return ast; + }; + elements.push_back(Element {.from = Table {.database = identifier(from_db), .table = identifier(from_table)}, .to = Table {.database = identifier(to_db), .table = identifier(to_table)}}); + } + protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a730ea0ba3d..b5e5e0f208d 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -198,6 +198,29 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s print_database_table(); } + if (sync_replica_mode != SyncReplicaMode::DEFAULT) + { + settings.ostr << ' '; + print_keyword(magic_enum::enum_name(sync_replica_mode)); + + // If the mode is LIGHTWEIGHT and specific source replicas are specified + if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) + { + settings.ostr << ' '; + print_keyword("FROM"); + settings.ostr << ' '; + + bool first = true; + for (const auto & src : src_replicas) + { + if (!first) + settings.ostr << ", "; + first = false; + settings.ostr << quoteString(src); + } + } + } + if (query_settings) { settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SETTINGS " << (settings.hilite ? hilite_none : ""); @@ -233,28 +256,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s print_identifier(disk); } - if (sync_replica_mode != SyncReplicaMode::DEFAULT) - { - settings.ostr << ' '; - print_keyword(magic_enum::enum_name(sync_replica_mode)); - - // If the mode is LIGHTWEIGHT and specific source replicas are specified - if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) - { - settings.ostr << ' '; - print_keyword("FROM"); - settings.ostr << ' '; - - bool first = true; - for (const auto & src : src_replicas) - { - if (!first) - settings.ostr << ", "; - first = false; - settings.ostr << quoteString(src); - } - } - } break; } case Type::SYNC_DATABASE_REPLICA: @@ -375,6 +376,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::START_VIEW: case Type::STOP_VIEW: case Type::CANCEL_VIEW: + case Type::WAIT_VIEW: { settings.ostr << ' '; print_database_table(); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 167e724dcee..d9f5b425182 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -95,6 +95,7 @@ public: START_CLEANUP, RESET_COVERAGE, REFRESH_VIEW, + WAIT_VIEW, START_VIEW, START_VIEWS, STOP_VIEW, @@ -130,6 +131,8 @@ public: String disk; UInt64 seconds{}; + std::optional query_cache_tag; + String filesystem_cache_name; std::string key_to_drop; std::optional offset_to_drop; diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp index 8ee98e704df..ffd746cc38a 100644 --- a/src/Parsers/ASTViewTargets.cpp +++ b/src/Parsers/ASTViewTargets.cpp @@ -21,6 +21,9 @@ std::string_view toString(ViewTarget::Kind kind) { case ViewTarget::To: return "to"; case ViewTarget::Inner: return "inner"; + case ViewTarget::Data: return "data"; + case ViewTarget::Tags: return "tags"; + case ViewTarget::Metrics: return "metrics"; } throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind); } @@ -254,6 +257,9 @@ std::optional ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kin { case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata case ViewTarget::Inner: return std::nullopt; + case ViewTarget::Data: return Keyword::DATA; /// DATA mydb.mydata + case ViewTarget::Tags: return Keyword::TAGS; /// TAGS mydb.mytags + case ViewTarget::Metrics: return Keyword::METRICS; /// METRICS mydb.mymetrics } UNREACHABLE(); } @@ -264,6 +270,9 @@ std::optional ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kin { case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree() case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree() + case ViewTarget::Data: return Keyword::DATA; /// DATA ENGINE = MergeTree() + case ViewTarget::Tags: return Keyword::TAGS; /// TAGS ENGINE = MergeTree() + case ViewTarget::Metrics: return Keyword::METRICS; /// METRICS ENGINE = MergeTree() } UNREACHABLE(); } @@ -274,6 +283,9 @@ std::optional ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind k { case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX' case ViewTarget::Inner: return std::nullopt; + case ViewTarget::Data: return Keyword::DATA_INNER_UUID; /// DATA INNER UUID 'XXX' + case ViewTarget::Tags: return Keyword::TAGS_INNER_UUID; /// TAGS INNER UUID 'XXX' + case ViewTarget::Metrics: return Keyword::METRICS_INNER_UUID; /// METRICS INNER UUID 'XXX' } UNREACHABLE(); } diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h index 12182919f0e..7814dd5249c 100644 --- a/src/Parsers/ASTViewTargets.h +++ b/src/Parsers/ASTViewTargets.h @@ -9,7 +9,7 @@ namespace DB class ASTStorage; enum class Keyword : size_t; -/// Information about target tables (external or inner) of a materialized view or a window view. +/// Information about target tables (external or inner) of a materialized view or a window view or a TimeSeries table. /// See ASTViewTargets for more details. struct ViewTarget { @@ -24,6 +24,15 @@ struct ViewTarget /// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view: /// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ... Inner, + + /// The "data" table for a TimeSeries table, contains time series. + Data, + + /// The "tags" table for a TimeSeries table, contains identifiers for each combination of a metric name and tags (labels). + Tags, + + /// The "metrics" table for a TimeSeries table, contains general information (metadata) about metrics. + Metrics, }; Kind kind = To; diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index f60fa7e4a23..eac88c75513 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -97,24 +97,9 @@ namespace void formatCurrentGrantsElements(const AccessRightsElements & elements, const IAST::FormatSettings & settings) { - for (size_t i = 0; i != elements.size(); ++i) - { - const auto & element = elements[i]; - - bool next_element_on_same_db_and_table = false; - if (i != elements.size() - 1) - { - const auto & next_element = elements[i + 1]; - if (element.sameDatabaseAndTableAndParameter(next_element)) - next_element_on_same_db_and_table = true; - } - - if (!next_element_on_same_db_and_table) - { - settings.ostr << " "; - formatONClause(element, settings); - } - } + settings.ostr << "("; + formatElementsWithoutOptions(elements, settings); + settings.ostr << ")"; } } diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index ddfdbe38903..ddf4e9ecda5 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -114,7 +114,7 @@ namespace T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) - return parseWithSizeSuffix(boost::algorithm::trim_copy(f.get())); + return parseWithSizeSuffix(boost::algorithm::trim_copy(f.safeGet())); else return applyVisitor(FieldVisitorConvertToNumber(), f); } diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 0ae9ee4833c..ab0e70eb0e5 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -116,6 +116,8 @@ namespace DB MR_MACROS(CURRENT_TRANSACTION, "CURRENT TRANSACTION") \ MR_MACROS(CURRENTUSER, "CURRENTUSER") \ MR_MACROS(D, "D") \ + MR_MACROS(DATA, "DATA") \ + MR_MACROS(DATA_INNER_UUID, "DATA INNER UUID") \ MR_MACROS(DATABASE, "DATABASE") \ MR_MACROS(DATABASES, "DATABASES") \ MR_MACROS(DATE, "DATE") \ @@ -288,6 +290,8 @@ namespace DB MR_MACROS(MCS, "MCS") \ MR_MACROS(MEMORY, "MEMORY") \ MR_MACROS(MERGES, "MERGES") \ + MR_MACROS(METRICS, "METRICS") \ + MR_MACROS(METRICS_INNER_UUID, "METRICS INNER UUID") \ MR_MACROS(MI, "MI") \ MR_MACROS(MICROSECOND, "MICROSECOND") \ MR_MACROS(MICROSECONDS, "MICROSECONDS") \ @@ -367,6 +371,7 @@ namespace DB MR_MACROS(POPULATE, "POPULATE") \ MR_MACROS(PRECEDING, "PRECEDING") \ MR_MACROS(PRECISION, "PRECISION") \ + MR_MACROS(PREFIX, "PREFIX") \ MR_MACROS(PREWHERE, "PREWHERE") \ MR_MACROS(PRIMARY_KEY, "PRIMARY KEY") \ MR_MACROS(PRIMARY, "PRIMARY") \ @@ -445,6 +450,7 @@ namespace DB MR_MACROS(SHOW, "SHOW") \ MR_MACROS(SIGNED, "SIGNED") \ MR_MACROS(SIMPLE, "SIMPLE") \ + MR_MACROS(SKIP, "SKIP") \ MR_MACROS(SOURCE, "SOURCE") \ MR_MACROS(SPATIAL, "SPATIAL") \ MR_MACROS(SQL_SECURITY, "SQL SECURITY") \ @@ -464,6 +470,9 @@ namespace DB MR_MACROS(TABLE_OVERRIDE, "TABLE OVERRIDE") \ MR_MACROS(TABLE, "TABLE") \ MR_MACROS(TABLES, "TABLES") \ + MR_MACROS(TAG, "TAG") \ + MR_MACROS(TAGS, "TAGS") \ + MR_MACROS(TAGS_INNER_UUID, "TAGS INNER UUID") \ MR_MACROS(TEMPORARY_TABLE, "TEMPORARY TABLE") \ MR_MACROS(TEMPORARY, "TEMPORARY") \ MR_MACROS(TEST, "TEST") \ @@ -635,6 +644,32 @@ protected: } }; +class ParserTokenSequence : public IParserBase +{ +private: + std::vector token_types; +public: + ParserTokenSequence(const std::vector & token_types_) : token_types(token_types_) {} /// NOLINT + +protected: + const char * getName() const override { return "token sequence"; } + + bool parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected) override + { + for (auto token_type : token_types) + { + if (pos->type != token_type) + { + expected.add(pos, getTokenName(token_type)); + return false; + } + + ++pos; + } + + return true; + } +}; // Parser always returns true and do nothing. class ParserNothing : public IParserBase diff --git a/src/Parsers/CreateQueryUUIDs.cpp b/src/Parsers/CreateQueryUUIDs.cpp index 4dfee67b537..fbdc6161408 100644 --- a/src/Parsers/CreateQueryUUIDs.cpp +++ b/src/Parsers/CreateQueryUUIDs.cpp @@ -45,6 +45,13 @@ CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_r /// then MV will create inner table. We should generate UUID of inner table here. if (query.is_materialized_view) generate_target_uuid(ViewTarget::To); + + if (query.is_time_series_table) + { + generate_target_uuid(ViewTarget::Data); + generate_target_uuid(ViewTarget::Tags); + generate_target_uuid(ViewTarget::Metrics); + } } } } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9927acdcf17..61b5723072e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -282,22 +282,106 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E return true; } +namespace +{ + +/// Parser of syntax sugar for reading JSON subcolumns of type Array(JSON): +/// json.a.b[][].c -> json.a.b.:Array(Array(JSON)).c +class ParserArrayOfJSONIdentifierAddition : public IParserBase +{ +public: + String getLastArrayOfJSONSubcolumnIdentifier() const + { + String subcolumn = ":`"; + for (size_t i = 0; i != last_array_level; ++i) + subcolumn += "Array("; + subcolumn += "JSON"; + for (size_t i = 0; i != last_array_level; ++i) + subcolumn += ")"; + return subcolumn + "`"; + } + +protected: + const char * getName() const override { return "ParserArrayOfJSONIdentifierDelimiter"; } + + bool parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected) override + { + last_array_level = 0; + ParserTokenSequence brackets_parser(std::vector{TokenType::OpeningSquareBracket, TokenType::ClosingSquareBracket}); + if (!brackets_parser.check(pos, expected)) + return false; + ++last_array_level; + while (brackets_parser.check(pos, expected)) + ++last_array_level; + return true; + } + +private: + size_t last_array_level; +}; + +} bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr id_list; - if (!ParserList(std::make_unique(allow_query_parameter, highlight_type), std::make_unique(TokenType::Dot), false) - .parse(pos, id_list, expected)) - return false; + auto element_parser = std::make_unique(allow_query_parameter, highlight_type); + std::vector> delimiter_parsers; + delimiter_parsers.emplace_back(std::make_unique(std::vector{TokenType::Dot, TokenType::Colon}), SpecialDelimiter::JSON_PATH_DYNAMIC_TYPE); + delimiter_parsers.emplace_back(std::make_unique(std::vector{TokenType::Dot, TokenType::Caret}), SpecialDelimiter::JSON_PATH_PREFIX); + delimiter_parsers.emplace_back(std::make_unique(TokenType::Dot), SpecialDelimiter::NONE); + ParserArrayOfJSONIdentifierAddition array_of_json_identifier_addition; std::vector parts; + SpecialDelimiter last_special_delimiter = SpecialDelimiter::NONE; ASTs params; - const auto & list = id_list->as(); - for (const auto & child : list.children) + + bool is_first = true; + Pos begin = pos; + while (true) { - parts.emplace_back(getIdentifierName(child)); + ASTPtr element; + if (!element_parser->parse(pos, element, expected)) + { + if (is_first) + return false; + pos = begin; + break; + } + + if (last_special_delimiter != SpecialDelimiter::NONE) + { + parts.push_back(static_cast(last_special_delimiter) + backQuote(getIdentifierName(element))); + } + else + { + parts.push_back(getIdentifierName(element)); + /// Check if we have Array of JSON subcolumn additioon after identifier + /// and replace it with corresponding type subcolumn. + if (!is_first && array_of_json_identifier_addition.check(pos, expected)) + parts.push_back(array_of_json_identifier_addition.getLastArrayOfJSONSubcolumnIdentifier()); + } + if (parts.back().empty()) - params.push_back(child->as()->getParam()); + params.push_back(element->as()->getParam()); + + is_first = false; + begin = pos; + bool parsed_delimiter = false; + for (const auto & [parser, special_delimiter] : delimiter_parsers) + { + if (parser->check(pos, expected)) + { + parsed_delimiter = true; + last_special_delimiter = special_delimiter; + break; + } + } + + if (!parsed_delimiter) + { + pos = begin; + break; + } } ParserKeyword s_uuid(Keyword::UUID); @@ -314,7 +398,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (parts.size() == 1) node = std::make_shared(parts[0], std::move(params)); @@ -769,9 +853,10 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected /// Parse numbers (including decimals), strings, arrays and tuples of them. + Pos begin = pos; const char * data_begin = pos->begin; const char * data_end = pos->end; - bool is_string_literal = pos->type == StringLiteral; + ASTPtr string_literal; if (pos->type == Minus) { @@ -782,10 +867,15 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected data_end = pos->end; ++pos; } - else if (pos->type == Number || is_string_literal) + else if (pos->type == Number) { ++pos; } + else if (pos->type == StringLiteral) + { + if (!ParserStringLiteral().parse(begin, string_literal, expected)) + return false; + } else if (isOneOf(pos->type)) { TokenType last_token = OpeningSquareBracket; @@ -853,20 +943,18 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (ParserToken(DoubleColon).ignore(pos, expected) && ParserDataType().parse(pos, type_ast, expected)) { - String s; size_t data_size = data_end - data_begin; - if (is_string_literal) + if (string_literal) { - ReadBufferFromMemory buf(data_begin, data_size); - readQuotedStringWithSQLStyle(s, buf); - assert(buf.count() == data_size); + node = createFunctionCast(string_literal, type_ast); + return true; } else - s = String(data_begin, data_size); - - auto literal = std::make_shared(std::move(s)); - node = createFunctionCast(literal, type_ast); - return true; + { + auto literal = std::make_shared(String(data_begin, data_size)); + node = createFunctionCast(literal, type_ast); + return true; + } } return false; @@ -1626,7 +1714,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!parser_string_literal.parse(pos, ast_prefix_name, expected)) return false; - column_name_prefix = ast_prefix_name->as().value.get(); + column_name_prefix = ast_prefix_name->as().value.safeGet(); } if (with_open_round_bracket) @@ -1689,7 +1777,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto res = std::make_shared(); if (regexp_node) - res->setPattern(regexp_node->as().value.get()); + res->setPattern(regexp_node->as().value.safeGet()); else res->children = std::move(identifiers); res->is_strict = is_strict; @@ -1861,7 +1949,7 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected else { auto regexp_matcher = std::make_shared(); - regexp_matcher->setPattern(regexp_node->as().value.get()); + regexp_matcher->setPattern(regexp_node->as().value.safeGet()); if (!transformers->children.empty()) { @@ -2310,7 +2398,7 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - destination_name = ast_space_name->as().value.get(); + destination_name = ast_space_name->as().value.safeGet(); } else if (mode == TTLMode::GROUP_BY) { diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 0209e785bff..903111f32db 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -52,11 +52,22 @@ protected: /** An identifier, possibly containing a dot, for example, x_yz123 or `something special` or Hits.EventTime, - * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' + * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx'. + * There is also special delimiters `.:` and `.^` for JSON type subcolumns. In case of special delimiter + * the next identifier part after it will include special delimiter and be back quoted always: json.a.b.:UInt32 -> ['json', 'a', 'b', ':`UInt32`']. + * It's needed to distinguish identifiers json.a.b.:UInt32 and json.a.b.`:UInt32`. + * There is also a special syntax sugar for reading JSON subcolumns of type Array(JSON): json.a.b[][].c -> json.a.b.:Array(Array(JSON)).c */ class ParserCompoundIdentifier : public IParserBase { public: + enum class SpecialDelimiter : char + { + NONE = '\0', + JSON_PATH_DYNAMIC_TYPE = ':', + JSON_PATH_PREFIX = '^', + }; + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index d38dc6d5f37..ad6b8e13ea6 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2811,8 +2811,8 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po if (op.type == OperatorType::TupleElement) { ASTPtr tmp; - if (asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) + if (asterisk_parser.parse(pos, tmp, expected) + || columns_matcher_parser.parse(pos, tmp, expected)) { if (auto * asterisk = tmp->as()) { @@ -2833,6 +2833,17 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po layers.back()->pushOperand(std::move(tmp)); return Action::OPERATOR; } + + /// If it is an identifier, + /// replace it with literal, because an expression `expr().elem` + /// should be transformed to `tupleElement(expr(), 'elem')` for query analysis, + /// otherwise the identifier `elem` will not be found. + if (ParserIdentifier().parse(pos, tmp, expected)) + { + layers.back()->pushOperator(op); + layers.back()->pushOperand(std::make_shared(tmp->as()->name())); + return Action::OPERATOR; + } } /// isNull & isNotNull are postfix unary operators @@ -2863,7 +2874,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po layers.push_back(std::make_unique()); if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) - layers.back()->between_counter++; + ++layers.back()->between_counter; return Action::OPERAND; } diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp index ad1eab49eeb..64aeae1b570 100644 --- a/src/Parsers/FieldFromAST.cpp +++ b/src/Parsers/FieldFromAST.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 37d7f458d61..5bd2c92c60a 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -165,11 +165,12 @@ size_t IAST::checkDepthImpl(size_t max_depth) const return res; } -String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const +String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const { WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; + settings.print_pretty_type_names = print_pretty_type_names; format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index e2cf7579667..2293d50b0ec 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -201,6 +201,7 @@ public: bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. LiteralEscapingStyle literal_escaping_style; + bool print_pretty_type_names; explicit FormatSettings( WriteBuffer & ostr_, @@ -209,7 +210,8 @@ public: bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, bool show_secrets_ = true, - LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular, + bool print_pretty_type_names_ = false) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -218,6 +220,7 @@ public: , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') , literal_escaping_style(literal_escaping_style_) + , print_pretty_type_names(print_pretty_type_names_) { } @@ -230,6 +233,7 @@ public: , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) , literal_escaping_style(other.literal_escaping_style) + , print_pretty_type_names(other.print_pretty_type_names) { } @@ -251,7 +255,7 @@ public: /// The state that is copied when each node is formatted. For example, nesting level. struct FormatStateStacked { - UInt8 indent = 0; + UInt16 indent = 0; bool need_parens = false; bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) @@ -274,7 +278,7 @@ public: /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. - String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const; + String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const; /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent * behaviour is due to the fact such functions are called from Client which knows nothing about @@ -283,12 +287,12 @@ public: */ String formatForLogging(size_t max_length = 0) const { - return formatWithPossiblyHidingSensitiveData(max_length, true, false); + return formatWithPossiblyHidingSensitiveData(max_length, true, false, false); } String formatForErrorMessage() const { - return formatWithPossiblyHidingSensitiveData(0, true, false); + return formatWithPossiblyHidingSensitiveData(0, true, false, false); } virtual bool hasSecretParts() const { return childrenHaveSecretParts(); } diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index b4601389696..43c4ab867d1 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -423,6 +423,8 @@ Token Lexer::nextTokenImpl() } case '?': return Token(TokenType::QuestionMark, token_begin, ++pos); + case '^': + return Token(TokenType::Caret, token_begin, ++pos); case ':': { ++pos; diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index 6f31d56292d..9dc0850abfd 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -45,6 +45,7 @@ namespace DB M(Arrow) /** ->. Should be distinguished from minus operator. */ \ M(QuestionMark) \ M(Colon) \ + M(Caret) \ M(DoubleColon) \ M(Equals) \ M(NotEquals) \ diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index dbefb0cb966..73fd563faf6 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -517,7 +517,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } else if (s_move_partition.ignore(pos, expected)) { @@ -545,7 +545,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } } else if (s_add_constraint.ignore(pos, expected)) @@ -638,7 +638,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->type = ASTAlterCommand::FETCH_PARTITION; } else if (s_fetch_part.ignore(pos, expected)) @@ -652,7 +652,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr ast_from; if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->part = true; command->type = ASTAlterCommand::FETCH_PARTITION; } @@ -680,7 +680,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } } else if (s_unfreeze.ignore(pos, expected)) @@ -707,7 +707,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } else { diff --git a/src/Parsers/ParserCheckQuery.cpp b/src/Parsers/ParserCheckQuery.cpp index 42716ba7f2c..33b6a5a1ac2 100644 --- a/src/Parsers/ParserCheckQuery.cpp +++ b/src/Parsers/ParserCheckQuery.cpp @@ -55,7 +55,7 @@ bool ParserCheckQuery::parseCheckTable(Pos & pos, ASTPtr & node, Expected & expe const auto * ast_literal = ast_part_name->as(); if (!ast_literal || ast_literal->value.getType() != Field::Types::String) return false; - query->part_name = ast_literal->value.get(); + query->part_name = ast_literal->value.safeGet(); } if (query->database) diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 9ebee4cc852..ed89b80edca 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -89,10 +89,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected else { auto index_type = index->getType(); - if (index_type && index_type->name == "annoy") - index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index_type && index_type->name == "usearch") - index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; + if (index_type && index_type->name == "vector_similarity") + index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index a592975613b..31dc2075db4 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -53,40 +53,6 @@ ASTPtr parseComment(IParser::Pos & pos, Expected & expected) } - -bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserToken open(TokenType::OpeningRoundBracket); - ParserToken close(TokenType::ClosingRoundBracket); - ParserIdentifier name_p; - ParserNameTypePairList columns_p; - - ASTPtr name; - ASTPtr columns; - - /// For now `name == 'Nested'`, probably alternative nested data structures will appear - if (!name_p.parse(pos, name, expected)) - return false; - - if (!open.ignore(pos, expected)) - return false; - - if (!columns_p.parse(pos, columns, expected)) - return false; - - if (!close.ignore(pos, expected)) - return false; - - auto func = std::make_shared(); - tryGetIdentifierNameInto(name, func->name); - - func->arguments = columns; - func->children.push_back(columns); - node = func; - - return true; -} - bool ParserSQLSecurity::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserToken s_eq(TokenType::Equals); @@ -214,10 +180,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe else { auto index_type = index->getType(); - if (index_type->name == "annoy") - index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index_type->name == "usearch") - index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; + if (index_type->name == "vector_similarity") + index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } @@ -696,6 +660,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr table; ASTPtr columns_list; std::shared_ptr storage; + bool is_time_series_table = false; ASTPtr targets; ASTPtr as_database; ASTPtr as_table; @@ -784,6 +749,13 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; storage = typeid_cast>(ast); + + if (storage && storage->engine && (storage->engine->name == "TimeSeries")) + { + is_time_series_table = true; + ParserViewTargets({ViewTarget::Data, ViewTarget::Tags, ViewTarget::Metrics}).parse(pos, targets, expected); + } + return true; }; @@ -873,6 +845,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->create_or_replace = or_replace; query->if_not_exists = if_not_exists; query->temporary = is_temporary; + query->is_time_series_table = is_time_series_table; query->database = table_id->getDatabase(); query->table = table_id->getTable(); @@ -922,7 +895,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->is_create_empty = is_create_empty; if (from_path) - query->attach_from_path = from_path->as().value.get(); + query->attach_from_path = from_path->as().value.safeGet(); return true; } @@ -1431,7 +1404,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (s_on.ignore(pos, expected)) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 53a62deb22b..82da2e7ea0b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -18,15 +18,6 @@ namespace DB { -/** A nested table. For example, Nested(UInt32 CounterID, FixedString(2) UserAgentMajor) - */ -class ParserNestedTable : public IParserBase -{ -protected: - const char * getName() const override { return "nested table"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - /** Parses sql security option. DEFINER = user_name SQL SECURITY DEFINER */ class ParserSQLSecurity : public IParserBase diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 2edb0141e12..d86b659df90 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -16,8 +17,8 @@ namespace DB namespace { -/// Parser of Dynamic type arguments: Dynamic(max_types=N) -class DynamicArgumentsParser : public IParserBase +/// Parser of Dynamic type argument: Dynamic(max_types=N) +class DynamicArgumentParser : public IParserBase { private: const char * getName() const override { return "Dynamic data type optional argument"; } @@ -46,14 +47,84 @@ private: } }; +/// Parser of Object type argument. For example: JSON(some_parameter=N, some.path SomeType, SKIP skip.path, ...) +class ObjectArgumentParser : public IParserBase +{ +private: + const char * getName() const override { return "JSON data type optional argument"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + auto argument = std::make_shared(); + + /// SKIP arguments + if (ParserKeyword(Keyword::SKIP).ignore(pos)) + { + /// SKIP REGEXP '' + if (ParserKeyword(Keyword::REGEXP).ignore(pos)) + { + ParserStringLiteral literal_parser; + ASTPtr literal; + if (!literal_parser.parse(pos, literal, expected)) + return false; + argument->skip_path_regexp = literal; + argument->children.push_back(argument->skip_path_regexp); + } + /// SKIP some.path + else + { + ParserCompoundIdentifier compound_identifier_parser; + ASTPtr compound_identifier; + if (!compound_identifier_parser.parse(pos, compound_identifier, expected)) + return false; + + argument->skip_path = compound_identifier; + argument->children.push_back(argument->skip_path); + } + + node = argument; + return true; + } + + ParserCompoundIdentifier compound_identifier_parser; + ASTPtr identifier; + if (!compound_identifier_parser.parse(pos, identifier, expected)) + return false; + + /// some_parameter=N + if (pos->type == TokenType::Equals) + { + ++pos; + ASTPtr number; + ParserNumber number_parser; + if (!number_parser.parse(pos, number, expected)) + return false; + + argument->parameter = makeASTFunction("equals", identifier, number); + argument->children.push_back(argument->parameter); + node = argument; + return true; + } + + ParserDataType type_parser; + ASTPtr type; + if (!type_parser.parse(pos, type, expected)) + return false; + + auto name_and_type = std::make_shared(); + name_and_type->name = getIdentifierName(identifier); + name_and_type->type = type; + name_and_type->children.push_back(name_and_type->type); + argument->path_with_type = name_and_type; + argument->children.push_back(argument->path_with_type); + node = argument; + return true; + } +}; + } bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserNestedTable nested; - if (nested.parse(pos, node, expected)) - return true; - String type_name; ParserIdentifier name_parser; @@ -171,10 +242,12 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Allow mixed lists of nested and normal types. /// Parameters are either: - /// - Nested table elements; + /// - Nested table element; + /// - Tuple element /// - Enum element in form of 'a' = 1; /// - literal; - /// - Dynamic type arguments; + /// - Dynamic type argument; + /// - JSON type argument; /// - another data type (or identifier); size_t arg_num = 0; @@ -192,13 +265,24 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr arg; if (type_name == "Dynamic") { - DynamicArgumentsParser parser; + DynamicArgumentParser parser; + parser.parse(pos, arg, expected); + } + else if (type_name == "JSON") + { + ObjectArgumentParser parser; parser.parse(pos, arg, expected); } else if (type_name == "Nested") { - ParserNestedTable nested_parser; - nested_parser.parse(pos, arg, expected); + ParserNameTypePair name_and_type_parser; + name_and_type_parser.parse(pos, arg, expected); + } + else if (type_name == "Tuple") + { + ParserNameTypePair name_and_type_parser; + ParserDataType only_type_parser; + name_and_type_parser.parse(pos, arg, expected) || only_type_parser.parse(pos, arg, expected); } else if (type_name == "AggregateFunction" || type_name == "SimpleAggregateFunction") { @@ -252,9 +336,6 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++arg_num; } - if (pos->type == TokenType::Comma) - // ignore trailing comma inside Nested structures like Tuple(Int, Tuple(Int, String),) - ++pos; if (pos->type != TokenType::ClosingRoundBracket) return false; ++pos; diff --git a/src/Parsers/ParserDictionary.cpp b/src/Parsers/ParserDictionary.cpp index 83a006231d9..ce38d1b54d1 100644 --- a/src/Parsers/ParserDictionary.cpp +++ b/src/Parsers/ParserDictionary.cpp @@ -33,7 +33,7 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (literal.value.getType() != Field::Types::UInt64) return false; - res->max_sec = literal.value.get(); + res->max_sec = literal.value.safeGet(); node = res; return true; } @@ -58,10 +58,10 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; if (pair.first == "min") - res->min_sec = literal->value.get(); + res->min_sec = literal->value.safeGet(); else if (pair.first == "max") { - res->max_sec = literal->value.get(); + res->max_sec = literal->value.safeGet(); initialized_max = true; } else diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp index 80a28f4803e..ab97b3d0e3b 100644 --- a/src/Parsers/ParserPartition.cpp +++ b/src/Parsers/ParserPartition.cpp @@ -65,7 +65,7 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (literal_ast->value.getType() == Field::Types::Tuple) { - fields_count = literal_ast->value.get().size(); + fields_count = literal_ast->value.safeGet().size(); } else { diff --git a/src/Parsers/ParserRefreshStrategy.cpp b/src/Parsers/ParserRefreshStrategy.cpp index e7912293d85..4f3b7c66558 100644 --- a/src/Parsers/ParserRefreshStrategy.cpp +++ b/src/Parsers/ParserRefreshStrategy.cpp @@ -96,6 +96,10 @@ bool ParserRefreshStrategy::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; refresh->set(refresh->settings, settings); } + + if (ParserKeyword{Keyword::APPEND}.ignore(pos, expected)) + refresh->append = true; + node = refresh; return true; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 0545c3e5568..af84dd10bfa 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -421,6 +421,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; case Type::REFRESH_VIEW: + case Type::WAIT_VIEW: case Type::START_VIEW: case Type::STOP_VIEW: case Type::CANCEL_VIEW: @@ -445,7 +446,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - String time_str = ast->as().value.get(); + String time_str = ast->as().value.safeGet(); ReadBufferFromString buf(time_str); time_t time; readDateTimeText(time, buf); @@ -467,7 +468,17 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; } - res->seconds = seconds->as()->value.get(); + res->seconds = seconds->as()->value.safeGet(); + break; + } + case Type::DROP_QUERY_CACHE: + { + ParserLiteral tag_parser; + ASTPtr ast; + if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected)) + res->query_cache_tag = std::make_optional(ast->as()->value.safeGet()); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; break; } case Type::DROP_FILESYSTEM_CACHE: @@ -538,7 +549,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (ParserKeyword{Keyword::WITH_NAME}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { - res->backup_name = ast->as().value.get(); + res->backup_name = ast->as().value.safeGet(); } else { @@ -577,7 +588,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - custom_name = ast->as().value.get(); + custom_name = ast->as().value.safeGet(); } return true; diff --git a/src/Parsers/ParserUndropQuery.cpp b/src/Parsers/ParserUndropQuery.cpp index 07ca8a3b5fd..57da47df70d 100644 --- a/src/Parsers/ParserUndropQuery.cpp +++ b/src/Parsers/ParserUndropQuery.cpp @@ -41,7 +41,7 @@ bool parseUndropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (ParserKeyword{Keyword::ON}.ignore(pos, expected)) { diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 20fd951d390..74fdcff79f7 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -39,7 +39,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE) clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) -set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") +set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier -Wno-extra-semi-stmt -Wno-used-but-marked-unused") # contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof] target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof) diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index a1ba46125a7..af3591750a1 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -56,21 +56,21 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(create->dictionary->source->name, "clickhouse"); auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as() -> first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "password"); - EXPECT_EQ(children[3]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[3]->as()->second->as()->value.safeGet(), ""); EXPECT_EQ(children[4]->as()->first, "db"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), "test"); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), "test"); EXPECT_EQ(children[5]->as()->first, "table"); - EXPECT_EQ(children[5]->as()->second->as()->value.get(), "table_for_dict"); + EXPECT_EQ(children[5]->as()->second->as()->value.safeGet(), "table_for_dict"); /// layout test auto * layout = create->dictionary->layout; @@ -102,9 +102,9 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 0); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 0); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -150,8 +150,8 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); EXPECT_EQ(attributes_children[0]->as()->default_value, nullptr); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -195,9 +195,9 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 100); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 100); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -248,25 +248,25 @@ TEST(ParserDictionaryDDL, NestedSource) auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as()->first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "replica"); auto replica = children[3]->as()->second->children; EXPECT_EQ(replica[0]->as()->first, "host"); - EXPECT_EQ(replica[0]->as()->second->as()->value.get(), "127.0.0.1"); + EXPECT_EQ(replica[0]->as()->second->as()->value.safeGet(), "127.0.0.1"); EXPECT_EQ(replica[1]->as()->first, "priority"); - EXPECT_EQ(replica[1]->as()->second->as()->value.get(), 1); + EXPECT_EQ(replica[1]->as()->second->as()->value.safeGet(), 1); EXPECT_EQ(children[4]->as()->first, "password"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), ""); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 6d5b60d8159..f2927d4145c 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -96,7 +96,7 @@ bool ExecutingGraph::addEdges(uint64_t node) return was_edge_added; } -bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) +ExecutingGraph::UpdateNodeStatus ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { auto & cur_node = *nodes[pid]; Processors new_processors; @@ -108,7 +108,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) catch (...) { cur_node.exception = std::current_exception(); - return false; + return UpdateNodeStatus::Exception; } { @@ -118,7 +118,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { for (auto & processor : new_processors) processor->cancel(); - return false; + return UpdateNodeStatus::Cancelled; } processors->insert(processors->end(), new_processors.begin(), new_processors.end()); @@ -178,7 +178,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) } } - return true; + return UpdateNodeStatus::Done; } void ExecutingGraph::initializeExecution(Queue & queue) @@ -213,7 +213,7 @@ void ExecutingGraph::initializeExecution(Queue & queue) } -bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue) +ExecutingGraph::UpdateNodeStatus ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue) { std::stack updated_edges; std::stack updated_processors; @@ -279,7 +279,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue try { auto & processor = *node.processor; - IProcessor::Status last_status = node.last_processor_status; + const auto last_status = node.last_processor_status; IProcessor::Status status = processor.prepare(node.updated_input_ports, node.updated_output_ports); node.last_processor_status = status; @@ -309,7 +309,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue catch (...) { node.exception = std::current_exception(); - return false; + return UpdateNodeStatus::Exception; } #ifndef NDEBUG @@ -319,7 +319,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue node.updated_input_ports.clear(); node.updated_output_ports.clear(); - switch (node.last_processor_status) + switch (*node.last_processor_status) { case IProcessor::Status::NeedData: case IProcessor::Status::PortFull: @@ -386,8 +386,9 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue read_lock.unlock(); { std::unique_lock lock(nodes_mutex); - if (!expandPipeline(updated_processors, pid)) - return false; + auto status = expandPipeline(updated_processors, pid); + if (status != UpdateNodeStatus::Done) + return status; } read_lock.lock(); @@ -397,7 +398,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } } - return true; + return UpdateNodeStatus::Done; } void ExecutingGraph::cancel(bool cancel_all_processors) diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 71dcd360a2c..8d8ba722b3e 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -92,7 +92,7 @@ public: std::exception_ptr exception; /// Last state for profiling. - IProcessor::Status last_processor_status = IProcessor::Status::NeedData; + std::optional last_processor_status; /// Ports which have changed their state since last processor->prepare() call. /// They changed when neighbour processors interact with connected ports. @@ -138,10 +138,17 @@ public: /// Traverse graph the first time to update all the childless nodes. void initializeExecution(Queue & queue); + enum class UpdateNodeStatus + { + Done, + Exception, + Cancelled, + }; + /// Update processor with pid number (call IProcessor::prepare). /// Check parents and children of current processor and push them to stacks if they also need to be updated. /// If processor wants to be expanded, lock will be upgraded to get write access to pipeline. - bool updateNode(uint64_t pid, Queue & queue, Queue & async_queue); + UpdateNodeStatus updateNode(uint64_t pid, Queue & queue, Queue & async_queue); void cancel(bool cancel_all_processors = true); @@ -155,7 +162,7 @@ private: /// Update graph after processor (pid) returned ExpandPipeline status. /// All new nodes and nodes with updated ports are pushed into stack. - bool expandPipeline(std::stack & stack, uint64_t pid); + UpdateNodeStatus expandPipeline(std::stack & stack, uint64_t pid); std::shared_ptr processors; std::vector source_processors; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 82cad471a29..72e1afaafaa 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -77,9 +77,13 @@ const Processors & PipelineExecutor::getProcessors() const return graph->getProcessors(); } -void PipelineExecutor::cancel() +void PipelineExecutor::cancel(ExecutionStatus reason) { - cancelled = true; + /// It is allowed to cancel not started query by user. + if (reason == ExecutionStatus::CancelledByUser) + tryUpdateExecutionStatus(ExecutionStatus::NotStarted, reason); + + tryUpdateExecutionStatus(ExecutionStatus::Executing, reason); finish(); graph->cancel(); } @@ -98,6 +102,11 @@ void PipelineExecutor::finish() tasks.finish(); } +bool PipelineExecutor::tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired) +{ + return execution_status.compare_exchange_strong(expected, desired); +} + void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); @@ -120,7 +129,7 @@ void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) } catch (...) { - span.addAttribute(ExecutionStatus::fromCurrentException()); + span.addAttribute(DB::ExecutionStatus::fromCurrentException()); #ifndef NDEBUG LOG_TRACE(log, "Exception while executing query. Current state:\n{}", dumpPipeline()); @@ -169,7 +178,7 @@ bool PipelineExecutor::checkTimeLimitSoft() // We call cancel here so that all processors are notified and tasks waken up // so that the "break" is faster and doesn't wait for long events if (!continuing) - cancel(); + cancel(ExecutionStatus::CancelledByTimeout); return continuing; } @@ -195,7 +204,8 @@ void PipelineExecutor::finalizeExecution() { checkTimeLimit(); - if (cancelled) + auto status = execution_status.load(); + if (status == ExecutionStatus::CancelledByTimeout || status == ExecutionStatus::CancelledByUser) return; bool all_processors_finished = true; @@ -271,7 +281,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie break; if (!context.executeTask()) - cancel(); + cancel(ExecutionStatus::Exception); if (tasks.isFinished()) break; @@ -289,11 +299,13 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie Queue async_queue; /// Prepare processor after execution. - if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - cancel(); + auto status = graph->updateNode(context.getProcessorID(), queue, async_queue); + if (status == ExecutingGraph::UpdateNodeStatus::Exception) + cancel(ExecutionStatus::Exception); /// Push other tasks to global queue. - tasks.pushTasks(queue, async_queue, context); + if (status == ExecutingGraph::UpdateNodeStatus::Done) + tasks.pushTasks(queue, async_queue, context); } #ifndef NDEBUG @@ -309,7 +321,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie { /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK. /// We should cancel execution properly before rethrow. - cancel(); + cancel(ExecutionStatus::Exception); throw; } @@ -328,6 +340,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) { is_execution_initialized = true; + tryUpdateExecutionStatus(ExecutionStatus::NotStarted, ExecutionStatus::Executing); size_t use_threads = num_threads; @@ -393,7 +406,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) { /// If finished_flag is not set, there was an exception. /// Cancel execution in this case. - cancel(); + cancel(ExecutionStatus::Exception); if (pool) pool->wait(); } @@ -432,7 +445,7 @@ String PipelineExecutor::dumpPipeline() const } } - std::vector statuses; + std::vector> statuses; std::vector proc_list; statuses.reserve(graph->nodes.size()); proc_list.reserve(graph->nodes.size()); diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index ae119355cb5..79d0a29d4e1 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -48,8 +48,20 @@ public: const Processors & getProcessors() const; + enum class ExecutionStatus + { + NotStarted, + Executing, + Finished, + Exception, + CancelledByUser, + CancelledByTimeout, + }; + /// Cancel execution. May be called from another thread. - void cancel(); + void cancel() { cancel(ExecutionStatus::CancelledByUser); } + + ExecutionStatus getExecutionStatus() const { return execution_status.load(); } /// Cancel processors which only read data from source. May be called from another thread. void cancelReading(); @@ -81,7 +93,7 @@ private: /// system.opentelemetry_span_log bool trace_processors = false; - std::atomic_bool cancelled = false; + std::atomic execution_status = ExecutionStatus::NotStarted; std::atomic_bool cancelled_reading = false; LoggerPtr log = getLogger("PipelineExecutor"); @@ -105,6 +117,10 @@ private: void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num); void finish(); + void cancel(ExecutionStatus reason); + + /// If execution_status == from, change it to desired. + bool tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired); String dumpPipeline() const; }; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 830a96533ed..866d224a08d 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } class PushingAsyncSource : public ISource @@ -176,6 +177,16 @@ void PushingAsyncPipelineExecutor::start() data->thread = ThreadFromGlobalPool(std::move(func)); } +[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status) +{ + if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout + || status == PipelineExecutor::ExecutionStatus::CancelledByUser) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} + void PushingAsyncPipelineExecutor::push(Chunk chunk) { if (!started) @@ -185,8 +196,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk) data->rethrowExceptionIfHas(); if (!is_pushed) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(data->executor->getExecutionStatus()); } void PushingAsyncPipelineExecutor::push(Block block) diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 696932932df..7a1c0111a3a 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } class PushingSource : public ISource @@ -80,6 +81,15 @@ const Block & PushingPipelineExecutor::getHeader() const return pushing_source->getPort().getHeader(); } +[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status) +{ + if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout + || status == PipelineExecutor::ExecutionStatus::CancelledByUser) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} void PushingPipelineExecutor::start() { @@ -91,8 +101,7 @@ void PushingPipelineExecutor::start() executor->setReadProgressCallback(pipeline.getReadProgressCallback()); if (!executor->executeStep(&input_wait_flag)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(executor->getExecutionStatus()); } void PushingPipelineExecutor::push(Chunk chunk) @@ -103,8 +112,7 @@ void PushingPipelineExecutor::push(Chunk chunk) pushing_source->setData(std::move(chunk)); if (!executor->executeStep(&input_wait_flag)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(executor->getExecutionStatus()); } void PushingPipelineExecutor::push(Block block) diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 88a6fb1e92f..97628778adb 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -69,9 +69,10 @@ void IOutputFormat::work() if (finished && !finalized) { - if (rows_before_limit_counter && rows_before_limit_counter->hasAppliedLimit()) + if (rows_before_limit_counter && rows_before_limit_counter->hasAppliedStep()) setRowsBeforeLimit(rows_before_limit_counter->get()); - + if (rows_before_aggregation_counter && rows_before_aggregation_counter->hasAppliedStep()) + setRowsBeforeAggregation(rows_before_aggregation_counter->get()); finalize(); if (auto_flush) flush(); diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index cae2ab7691e..e9af4ca7cf5 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -1,9 +1,9 @@ #pragma once #include -#include -#include #include +#include +#include #include namespace DB @@ -36,14 +36,20 @@ public: void setAutoFlush() { auto_flush = true; } /// Value for rows_before_limit_at_least field. - virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} + virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) { } /// Counter to calculate rows_before_limit_at_least in processors pipeline. - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_counter.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { rows_before_limit_counter.swap(counter); } + + /// Value for rows_before_aggregation field. + virtual void setRowsBeforeAggregation(size_t /*rows_before_aggregation*/) { } + + /// Counter to calculate rows_before_aggregation in processors pipeline. + void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr counter) override { rows_before_aggregation_counter.swap(counter); } /// Notify about progress. Method could be called from different threads. /// Passed value are delta, that must be summarized. - virtual void onProgress(const Progress & /*progress*/) {} + virtual void onProgress(const Progress & /*progress*/) { } /// Content-Type to set when sending HTTP response. virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; } @@ -151,6 +157,8 @@ protected: Progress progress; bool applied_limit = false; size_t rows_before_limit = 0; + bool applied_aggregation = false; + size_t rows_before_aggregation = 0; Chunk totals; Chunk extremes; }; @@ -184,7 +192,8 @@ protected: bool need_write_prefix = true; bool need_write_suffix = true; - RowsBeforeLimitCounterPtr rows_before_limit_counter; + RowsBeforeStepCounterPtr rows_before_limit_counter; + RowsBeforeStepCounterPtr rows_before_aggregation_counter; Statistics statistics; private: diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ed91913de4d..77d5867c554 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -133,16 +133,31 @@ static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptr buffer = chunk.value_data(); const size_t chunk_length = chunk.length(); - for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i) + const size_t null_count = chunk.null_count(); + if (null_count == 0) { - if (!chunk.IsNull(offset_i) && buffer) + for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i) { const auto * raw_data = buffer->data() + chunk.value_offset(offset_i); column_chars_t.insert_assume_reserved(raw_data, raw_data + chunk.value_length(offset_i)); - } - column_chars_t.emplace_back('\0'); + column_chars_t.emplace_back('\0'); - column_offsets.emplace_back(column_chars_t.size()); + column_offsets.emplace_back(column_chars_t.size()); + } + } + else + { + for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i) + { + if (!chunk.IsNull(offset_i) && buffer) + { + const auto * raw_data = buffer->data() + chunk.value_offset(offset_i); + column_chars_t.insert_assume_reserved(raw_data, raw_data + chunk.value_length(offset_i)); + } + column_chars_t.emplace_back('\0'); + + column_offsets.emplace_back(column_chars_t.size()); + } } } return {std::move(internal_column), std::move(internal_type), column_name}; @@ -743,6 +758,15 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( case TypeIndex::IPv6: return readIPv6ColumnFromBinaryData(arrow_column, column_name); /// ORC format outputs big integers as binary column, because there is no fixed binary in ORC. + /// + /// When ORC/Parquet file says the type is "byte array" or "fixed len byte array", + /// but the clickhouse query says to interpret the column as e.g. Int128, it + /// may mean one of two things: + /// * The byte array is the 16 bytes of Int128, little-endian. + /// * The byte array is an ASCII string containing the Int128 formatted in base 10. + /// There's no reliable way to distinguish these cases. We just guess: if the + /// byte array is variable-length, and the length is different from sizeof(type), + /// we parse as text, otherwise as binary. case TypeIndex::Int128: return readColumnWithBigNumberFromBinaryData(arrow_column, column_name, type_hint); case TypeIndex::UInt128: diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 58bf4c1a2fc..30301b242db 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -185,7 +185,7 @@ namespace DB } else { - auto value = static_cast(column[value_i].get>().getValue()); + auto value = static_cast(column[value_i].safeGet>().getValue()); if (need_rescale) { if (common::mulOverflow(value, rescale_multiplier, value)) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 06e8668cd7c..566a036d79c 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -208,20 +208,20 @@ private: /// Do not replace empty array and array of NULLs if (literal->value.getType() == Field::Types::Array) { - const Array & array = literal->value.get(); + const Array & array = literal->value.safeGet(); auto not_null = std::find_if_not(array.begin(), array.end(), [](const auto & elem) { return elem.isNull(); }); if (not_null == array.end()) return true; } else if (literal->value.getType() == Field::Types::Map) { - const Map & map = literal->value.get(); + const Map & map = literal->value.safeGet(); if (map.size() % 2) return false; } else if (literal->value.getType() == Field::Types::Tuple) { - const Tuple & tuple = literal->value.get(); + const Tuple & tuple = literal->value.safeGet(); for (const auto & value : tuple) if (value.isNull()) diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 9da5e533324..a72c6037619 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -168,10 +168,11 @@ JSONAsObjectRowInputFormat::JSONAsObjectRowInputFormat( const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) : JSONAsRowInputFormat(header_, in_, params_, format_settings_) { - if (!isObject(header_.getByPosition(0).type)) + const auto & type = header_.getByPosition(0).type; + if (!isObject(type) && !isObjectDeprecated(type)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Input format JSONAsObject is only suitable for tables with a single column of type Object but the column type is {}", - header_.getByPosition(0).type->getName()); + "Input format JSONAsObject is only suitable for tables with a single column of type Object/JSON but the column type is {}", + type->getName()); } void JSONAsObjectRowInputFormat::readJSONObject(IColumn & column) @@ -186,13 +187,13 @@ Chunk JSONAsObjectRowInputFormat::getChunkForCount(size_t rows) return Chunk({std::move(column)}, rows); } -JSONAsObjectExternalSchemaReader::JSONAsObjectExternalSchemaReader(const FormatSettings & settings) +JSONAsObjectExternalSchemaReader::JSONAsObjectExternalSchemaReader(const FormatSettings & settings_) : settings(settings_) { - if (!settings.json.allow_object_type) + if (!settings.json.allow_deprecated_object_type && !settings.json.allow_json_type) throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot infer the data structure in JSONAsObject format because experimental Object type is not allowed. Set setting " - "allow_experimental_object_type = 1 in order to allow it"); + "Cannot infer the data structure in JSONAsObject format because experimental Object/JSON type is not allowed. Set setting " + "allow_experimental_object_type = 1 or allow_experimental_json_type=1 in order to allow it"); } void registerInputFormatJSONAsString(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index 5eaa88182b7..f33108472de 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -70,12 +71,17 @@ public: class JSONAsObjectExternalSchemaReader : public IExternalSchemaReader { public: - explicit JSONAsObjectExternalSchemaReader(const FormatSettings & settings); + explicit JSONAsObjectExternalSchemaReader(const FormatSettings & settings_); NamesAndTypesList readSchema() override { - return {{"json", std::make_shared("json", false)}}; + if (settings.json.allow_json_type) + return {{"json", std::make_shared(DataTypeObject::SchemaFormat::JSON)}}; + return {{"json", std::make_shared("json", false)}}; } + +private: + FormatSettings settings; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index 1e8f57aa9a6..2f285e3d202 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -81,6 +81,8 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() rows, statistics.rows_before_limit, statistics.applied_limit, + statistics.rows_before_aggregation, + statistics.applied_aggregation, statistics.watch, statistics.progress, format_settings.write_statistics, diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h index c72b4d87234..e5208440483 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -44,6 +44,11 @@ public: String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } void setRowsBeforeLimit(size_t rows_before_limit_) override { statistics.rows_before_limit = rows_before_limit_; statistics.applied_limit = true; } + void setRowsBeforeAggregation(size_t rows_before_aggregation_) override + { + statistics.rows_before_aggregation = rows_before_aggregation_; + statistics.applied_aggregation = true; + } void onProgress(const Progress & progress_) override { statistics.progress.incrementPiecewiseAtomically(progress_); } protected: diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 20182d84917..fec24b10c11 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -116,6 +116,8 @@ void JSONRowOutputFormat::finalizeImpl() row_count, statistics.rows_before_limit, statistics.applied_limit, + statistics.rows_before_aggregation, + statistics.applied_aggregation, statistics.watch, statistics.progress, settings.write_statistics && exception_message.empty(), diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index a38cd0e8db9..c36adb5ee3e 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -35,6 +35,11 @@ public: statistics.applied_limit = true; statistics.rows_before_limit = rows_before_limit_; } + void setRowsBeforeAggregation(size_t rows_before_aggregation_) override + { + statistics.applied_aggregation = true; + statistics.rows_before_aggregation = rows_before_aggregation_; + } protected: void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 649721f28bf..e68286bfcc5 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -262,7 +262,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy { case orc::BOOLEAN: { /// May throw exception - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val != 0); } case orc::BYTE: @@ -275,7 +275,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy /// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10 /// We have to reject this, otherwise it would miss values > 127 (because /// they're treated as negative by ORC). - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val); } case orc::FLOAT: @@ -1143,24 +1143,42 @@ readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::T reserver_size += 1; } - column_chars_t.reserve(reserver_size); - column_offsets.reserve(orc_str_column->numElements); + column_chars_t.resize_exact(reserver_size); + column_offsets.resize_exact(orc_str_column->numElements); size_t curr_offset = 0; - for (size_t i = 0; i < orc_str_column->numElements; ++i) + if (!orc_str_column->hasNulls) { - if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) + for (size_t i = 0; i < orc_str_column->numElements; ++i) { const auto * buf = orc_str_column->data[i]; size_t buf_size = orc_str_column->length[i]; - column_chars_t.insert_assume_reserved(buf, buf + buf_size); + memcpy(&column_chars_t[curr_offset], buf, buf_size); curr_offset += buf_size; + + column_chars_t[curr_offset] = 0; + ++curr_offset; + + column_offsets[i] = curr_offset; } + } + else + { + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (orc_str_column->notNull[i]) + { + const auto * buf = orc_str_column->data[i]; + size_t buf_size = orc_str_column->length[i]; + memcpy(&column_chars_t[curr_offset], buf, buf_size); + curr_offset += buf_size; + } - column_chars_t.push_back(0); - ++curr_offset; + column_chars_t[curr_offset] = 0; + ++curr_offset; - column_offsets.push_back(curr_offset); + column_offsets[i] = curr_offset; + } } return {std::move(internal_column), std::move(internal_type), column_name}; } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 6f543a05fba..4a7a23158ff 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -203,25 +204,15 @@ template void ORCBlockOutputFormat::writeNumbers( orc::ColumnVectorBatch & orc_column, const IColumn & column, - const PaddedPODArray * null_bytemap, + const PaddedPODArray * /*null_bytemap*/, ConvertFunc convert) { NumberVectorBatch & number_orc_column = dynamic_cast(orc_column); const auto & number_column = assert_cast &>(column); - number_orc_column.resize(number_column.size()); + number_orc_column.data.resize(number_column.size()); for (size_t i = 0; i != number_column.size(); ++i) - { - if (null_bytemap && (*null_bytemap)[i]) - { - number_orc_column.notNull[i] = 0; - continue; - } - - number_orc_column.notNull[i] = 1; number_orc_column.data[i] = convert(number_column.getElement(i)); - } - number_orc_column.numElements = number_column.size(); } template @@ -229,7 +220,7 @@ void ORCBlockOutputFormat::writeDecimals( orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, - const PaddedPODArray * null_bytemap, + const PaddedPODArray * /*null_bytemap*/, ConvertFunc convert) { DecimalVectorBatch & decimal_orc_column = dynamic_cast(orc_column); @@ -238,71 +229,49 @@ void ORCBlockOutputFormat::writeDecimals( decimal_orc_column.precision = decimal_type->getPrecision(); decimal_orc_column.scale = decimal_type->getScale(); decimal_orc_column.resize(decimal_column.size()); - for (size_t i = 0; i != decimal_column.size(); ++i) - { - if (null_bytemap && (*null_bytemap)[i]) - { - decimal_orc_column.notNull[i] = 0; - continue; - } - decimal_orc_column.notNull[i] = 1; + decimal_orc_column.values.resize(decimal_column.size()); + for (size_t i = 0; i != decimal_column.size(); ++i) decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value); - } - decimal_orc_column.numElements = decimal_column.size(); } template void ORCBlockOutputFormat::writeStrings( orc::ColumnVectorBatch & orc_column, const IColumn & column, - const PaddedPODArray * null_bytemap) + const PaddedPODArray * /*null_bytemap*/) { orc::StringVectorBatch & string_orc_column = dynamic_cast(orc_column); const auto & string_column = assert_cast(column); - string_orc_column.resize(string_column.size()); + string_orc_column.data.resize(string_column.size()); + string_orc_column.length.resize(string_column.size()); for (size_t i = 0; i != string_column.size(); ++i) { - if (null_bytemap && (*null_bytemap)[i]) - { - string_orc_column.notNull[i] = 0; - continue; - } - - string_orc_column.notNull[i] = 1; const std::string_view & string = string_column.getDataAt(i).toView(); string_orc_column.data[i] = const_cast(string.data()); string_orc_column.length[i] = string.size(); } - string_orc_column.numElements = string_column.size(); } template void ORCBlockOutputFormat::writeDateTimes( orc::ColumnVectorBatch & orc_column, const IColumn & column, - const PaddedPODArray * null_bytemap, + const PaddedPODArray * /*null_bytemap*/, GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds) { orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast(orc_column); const auto & timestamp_column = assert_cast(column); - timestamp_orc_column.resize(timestamp_column.size()); + timestamp_orc_column.data.resize(timestamp_column.size()); + timestamp_orc_column.nanoseconds.resize(timestamp_column.size()); for (size_t i = 0; i != timestamp_column.size(); ++i) { - if (null_bytemap && (*null_bytemap)[i]) - { - timestamp_orc_column.notNull[i] = 0; - continue; - } - - timestamp_orc_column.notNull[i] = 1; timestamp_orc_column.data[i] = static_cast(get_seconds(timestamp_column.getElement(i))); timestamp_orc_column.nanoseconds[i] = static_cast(get_nanoseconds(timestamp_column.getElement(i))); } - timestamp_orc_column.numElements = timestamp_column.size(); } void ORCBlockOutputFormat::writeColumn( @@ -311,9 +280,27 @@ void ORCBlockOutputFormat::writeColumn( DataTypePtr & type, const PaddedPODArray * null_bytemap) { - orc_column.notNull.resize(column.size()); + size_t rows = column.size(); + orc_column.resize(rows); + orc_column.numElements = rows; + + /// Calculate orc_column.hasNulls if (null_bytemap) - orc_column.hasNulls = true; + orc_column.hasNulls = !memoryIsZero(null_bytemap->data(), 0, null_bytemap->size()); + else + orc_column.hasNulls = false; + + /// Fill orc_column.notNull + if (orc_column.hasNulls) + { + for (size_t i = 0; i < rows; ++i) + orc_column.notNull[i] = !(*null_bytemap)[i]; + } + else + { + for (size_t i = 0; i < rows; ++i) + orc_column.notNull[i] = 1; + } /// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to /// make the ORC library calculate min and max correctly. @@ -471,6 +458,7 @@ void ORCBlockOutputFormat::writeColumn( } case TypeIndex::Nullable: { + chassert(!null_bytemap); const auto & nullable_column = assert_cast(column); const PaddedPODArray & new_null_bytemap = assert_cast &>(*nullable_column.getNullMapColumnPtr()).getData(); auto nested_type = removeNullable(type); @@ -485,19 +473,15 @@ void ORCBlockOutputFormat::writeColumn( const ColumnArray::Offsets & offsets = list_column.getOffsets(); size_t column_size = list_column.size(); - list_orc_column.resize(column_size); + list_orc_column.offsets.resize(column_size + 1); /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i]. list_orc_column.offsets[0] = 0; for (size_t i = 0; i != column_size; ++i) - { list_orc_column.offsets[i + 1] = offsets[i]; - list_orc_column.notNull[i] = 1; - } orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements; - writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap); - list_orc_column.numElements = column_size; + writeColumn(nested_orc_column, list_column.getData(), nested_type, nullptr); break; } case TypeIndex::Tuple: @@ -505,10 +489,8 @@ void ORCBlockOutputFormat::writeColumn( orc::StructVectorBatch & struct_orc_column = dynamic_cast(orc_column); const auto & tuple_column = assert_cast(column); auto nested_types = assert_cast(type.get())->getElements(); - for (size_t i = 0; i != tuple_column.size(); ++i) - struct_orc_column.notNull[i] = 1; for (size_t i = 0; i != tuple_column.tupleSize(); ++i) - writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap); + writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], nullptr); break; } case TypeIndex::Map: @@ -520,25 +502,21 @@ void ORCBlockOutputFormat::writeColumn( size_t column_size = list_column.size(); - map_orc_column.resize(list_column.size()); + map_orc_column.offsets.resize(column_size + 1); /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i]. map_orc_column.offsets[0] = 0; for (size_t i = 0; i != column_size; ++i) - { map_orc_column.offsets[i + 1] = offsets[i]; - map_orc_column.notNull[i] = 1; - } + const auto nested_columns = assert_cast(list_column.getDataPtr().get())->getColumns(); orc::ColumnVectorBatch & keys_orc_column = *map_orc_column.keys; auto key_type = map_type.getKeyType(); - writeColumn(keys_orc_column, *nested_columns[0], key_type, null_bytemap); + writeColumn(keys_orc_column, *nested_columns[0], key_type, nullptr); orc::ColumnVectorBatch & values_orc_column = *map_orc_column.elements; auto value_type = map_type.getValueType(); - writeColumn(values_orc_column, *nested_columns[1], value_type, null_bytemap); - - map_orc_column.numElements = column_size; + writeColumn(values_orc_column, *nested_columns[1], value_type, nullptr); break; } default: @@ -546,27 +524,6 @@ void ORCBlockOutputFormat::writeColumn( } } -size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type) -{ - if (type->getTypeId() == TypeIndex::Array) - { - auto nested_type = assert_cast(*type).getNestedType(); - const IColumn & nested_column = assert_cast(column).getData(); - return std::max(column.size(), getColumnSize(nested_column, nested_type)); - } - - return column.size(); -} - -size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk) -{ - size_t columns_num = chunk.getNumColumns(); - size_t max_column_size = 0; - for (size_t i = 0; i != columns_num; ++i) - max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i])); - return max_column_size; -} - void ORCBlockOutputFormat::consume(Chunk chunk) { if (!writer) @@ -575,10 +532,7 @@ void ORCBlockOutputFormat::consume(Chunk chunk) size_t columns_num = chunk.getNumColumns(); size_t rows_num = chunk.getNumRows(); - /// getMaxColumnSize is needed to write arrays. - /// The size of the batch must be no less than total amount of array elements - /// and no less than the number of rows (ORC writes a null bit for every row). - std::unique_ptr batch = writer->createRowBatch(getMaxColumnSize(chunk)); + std::unique_ptr batch = writer->createRowBatch(chunk.getNumRows()); orc::StructVectorBatch & root = dynamic_cast(*batch); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 28837193d1a..06ecac9b820 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -69,11 +69,6 @@ private: void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); - /// These two functions are needed to know maximum nested size of arrays to - /// create an ORC Batch with the appropriate size - size_t getColumnSize(const IColumn & column, DataTypePtr & type); - size_t getMaxColumnSize(Chunk & chunk); - void prepareWriter(); const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index 54617c77f86..02c74742226 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -313,6 +313,12 @@ private: statistics.rows_before_limit = rows_before_limit; statistics.applied_limit = true; } + void setRowsBeforeAggregation(size_t rows_before_aggregation) override + { + std::lock_guard lock(statistics_mutex); + statistics.rows_before_aggregation = rows_before_aggregation; + statistics.applied_aggregation = true; + } }; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index bc5e8292192..1f213fef731 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -54,7 +55,7 @@ namespace ErrorCodes } \ } while (false) -/// Decode min/max value from column chunk statistics. +/// Decode min/max value from column chunk statistics. Returns Null if missing or unsupported. /// /// There are two questionable decisions in this implementation: /// * We parse the value from the encoded byte string instead of casting the parquet::Statistics @@ -62,7 +63,7 @@ namespace ErrorCodes /// * We dispatch based on the parquet logical+converted+physical type instead of the ClickHouse type. /// The idea is that this is similar to what we'll have to do when reimplementing Parquet parsing in /// ClickHouse instead of using Arrow (for speed). So, this is an exercise in parsing Parquet manually. -static std::optional decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr) +static Field decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr, TypeIndex type_hint) { using namespace parquet; @@ -118,8 +119,6 @@ static std::optional decodePlainParquetValueSlow(const std::string & data if (data.size() != size || size < 1 || size > 32) throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal size: {} (actual {})", size, data.size()); - /// For simplicity, widen all decimals to 256-bit. It should compare correctly with values - /// of different bitness. Int256 val = 0; memcpy(&val, data.data(), size); if (big_endian) @@ -128,7 +127,19 @@ static std::optional decodePlainParquetValueSlow(const std::string & data if (size < 32 && (val >> (size * 8 - 1)) != 0) val |= ~((Int256(1) << (size * 8)) - 1); - return Field(DecimalField(Decimal256(val), static_cast(scale))); + auto narrow = [&](auto x) -> Field + { + memcpy(&x, &val, sizeof(x)); + return Field(DecimalField(x, static_cast(scale))); + }; + if (size <= 4) + return narrow(Decimal32(0)); + else if (size <= 8) + return narrow(Decimal64(0)); + else if (size <= 16) + return narrow(Decimal128(0)); + else + return narrow(Decimal256(0)); } while (false); @@ -185,8 +196,6 @@ static std::optional decodePlainParquetValueSlow(const std::string & data return Field(val); } - /// Strings. - if (physical_type == Type::type::BYTE_ARRAY || physical_type == Type::type::FIXED_LEN_BYTE_ARRAY) { /// Arrow's parquet decoder handles missing min/max values slightly incorrectly. @@ -213,14 +222,31 @@ static std::optional decodePlainParquetValueSlow(const std::string & data /// TODO: Remove this workaround either when we implement our own Parquet decoder that /// doesn't have this bug, or if it's fixed in Arrow. if (data.empty()) - return std::nullopt; + return Field(); + /// Long integers, encoded either as text or as little-endian bytes. + /// The parquet file doesn't know that it's numbers, so the min/max are produced by comparing + /// strings lexicographically. So these min and max are mostly useless to us. + /// There's one case where they're not useless: min == max; currently we don't make use of this. + switch (type_hint) + { + case TypeIndex::UInt128: + case TypeIndex::UInt256: + case TypeIndex::Int128: + case TypeIndex::Int256: + case TypeIndex::IPv6: + return Field(); + default: break; + } + + /// Strings. return Field(data); } - /// This one's deprecated in Parquet. + /// This type is deprecated in Parquet. + /// TODO: But turns out it's still used in practice, we should support it. if (physical_type == Type::type::INT96) - throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Parquet INT96 type is deprecated and not supported"); + return Field(); /// Integers. @@ -260,6 +286,9 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (!s) continue; + if (s->descr()->schema_node()->is_repeated()) + continue; + auto path = c->path_in_schema()->ToDotVector(); if (path.size() != 1) continue; // compound types not supported @@ -283,15 +312,13 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa continue; auto stats = it->second; - auto default_value = [&]() -> Field - { - DataTypePtr type = header.getByPosition(idx).type; - if (type->lowCardinality()) - type = assert_cast(*type).getDictionaryType(); - if (type->isNullable()) - type = assert_cast(*type).getNestedType(); - return type->getDefault(); - }; + DataTypePtr type = header.getByPosition(idx).type; + if (type->lowCardinality()) + type = assert_cast(*type).getDictionaryType(); + if (type->isNullable()) + type = assert_cast(*type).getNestedType(); + Field default_value = type->getDefault(); + TypeIndex type_index = type->getTypeId(); /// Only primitive fields are supported, not arrays, maps, tuples, or Nested. /// Arrays, maps, and Nested can't be meaningfully supported because Parquet only has min/max @@ -299,14 +326,47 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa /// Same limitation for tuples, but maybe it would make sense to have some kind of tuple /// expansion in KeyCondition to accept ranges per element instead of whole tuple. - std::optional min; - std::optional max; + Field min; + Field max; if (stats->HasMinMax()) { try { - min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr()); - max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr()); + min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr(), type_index); + max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr(), type_index); + + /// If the data type in parquet file substantially differs from the requested data type, + /// it's sometimes correct to just typecast the min/max values. + /// Other times it's incorrect, e.g.: + /// INSERT INTO FUNCTION file('t.parquet', Parquet, 'x String') VALUES ('1'), ('100'), ('2'); + /// SELECT * FROM file('t.parquet', Parquet, 'x Int64') WHERE x >= 3; + /// If we just typecast min/max from string to integer, this query will incorrectly return empty result. + /// Allow conversion in some simple cases, otherwise ignore the min/max values. + auto min_type = min.getType(); + auto max_type = max.getType(); + min = convertFieldToType(min, *type); + max = convertFieldToType(max, *type); + auto ok_cast = [&](Field::Types::Which from, Field::Types::Which to) -> bool + { + if (from == to) + return true; + /// Decimal -> wider decimal. + if (Field::isDecimal(from) || Field::isDecimal(to)) + return Field::isDecimal(from) && Field::isDecimal(to) && to >= from; + /// Integer -> IP. + if (to == Field::Types::IPv4) + return from == Field::Types::UInt64; + /// Disable index for everything else, especially string <-> number. + return false; + }; + if (!(ok_cast(min_type, min.getType()) && ok_cast(max_type, max.getType())) && + !(min == max) && + !(min_type == Field::Types::Int64 && min.getType() == Field::Types::UInt64 && min.safeGet() >= 0) && + !(max_type == Field::Types::UInt64 && max.getType() == Field::Types::Int64 && max.safeGet() <= UInt64(INT64_MAX))) + { + min = Field(); + max = Field(); + } } catch (Exception & e) { @@ -328,7 +388,7 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa { /// Single-point range containing either the default value of one of the infinities. if (null_as_default) - hyperrectangle[idx].right = hyperrectangle[idx].left = default_value(); + hyperrectangle[idx].right = hyperrectangle[idx].left = default_value; else hyperrectangle[idx].right = hyperrectangle[idx].left; continue; @@ -339,32 +399,31 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (null_as_default) { /// Make sure the range contains the default value. - Field def = default_value(); - if (min.has_value() && applyVisitor(FieldVisitorAccurateLess(), def, *min)) - min = def; - if (max.has_value() && applyVisitor(FieldVisitorAccurateLess(), *max, def)) - max = def; + if (!min.isNull() && applyVisitor(FieldVisitorAccurateLess(), default_value, min)) + min = default_value; + if (!max.isNull() && applyVisitor(FieldVisitorAccurateLess(), max, default_value)) + max = default_value; } else { /// Make sure the range reaches infinity on at least one side. - if (min.has_value() && max.has_value()) - min.reset(); + if (!min.isNull() && !max.isNull()) + min = Field(); } } else { /// If the column doesn't have nulls, exclude both infinities. - if (!min.has_value()) + if (min.isNull()) hyperrectangle[idx].left_included = false; - if (!max.has_value()) + if (max.isNull()) hyperrectangle[idx].right_included = false; } - if (min.has_value()) - hyperrectangle[idx].left = std::move(min.value()); - if (max.has_value()) - hyperrectangle[idx].right = std::move(max.value()); + if (!min.isNull()) + hyperrectangle[idx].left = std::move(min); + if (!max.isNull()) + hyperrectangle[idx].right = std::move(max); } return hyperrectangle; diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index 3578401a0f8..b43c195f201 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -286,10 +286,10 @@ static void columnMapToContainer(const ColumnMap * col_map, size_t row_num, Cont { Field field; col_map->get(row_num, field); - const auto & map_field = field.get(); + const auto & map_field = field.safeGet(); for (const auto & map_element : map_field) { - const auto & map_entry = map_element.get(); + const auto & map_entry = map_element.safeGet(); String entry_key; String entry_value; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 1c43a0fa331..5d6db17aaa2 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -42,9 +42,11 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, Writ case static_cast(ResultsetPart::TimeElapsed): case static_cast(ResultsetPart::RowsRead): case static_cast(ResultsetPart::BytesRead): + case static_cast(ResultsetPart::RowsBeforeAggregation): if (format.escaping_rules[i] == EscapingRule::None) - format.throwInvalidFormat("Serialization type for output part rows, rows_before_limit, time, " - "rows_read or bytes_read is not specified", i); + format.throwInvalidFormat( + "Serialization type for output part rows, rows, time, " + "rows_read or bytes_read is not specified", i); break; default: format.throwInvalidFormat("Invalid output part", i); @@ -88,6 +90,8 @@ TemplateBlockOutputFormat::ResultsetPart TemplateBlockOutputFormat::stringToResu return ResultsetPart::RowsRead; else if (part == "bytes_read") return ResultsetPart::BytesRead; + else if (part == "rows_before_aggregation") + return ResultsetPart::RowsBeforeAggregation; else throw Exception(ErrorCodes::SYNTAX_ERROR, "Unknown output part {}", part); } @@ -173,6 +177,11 @@ void TemplateBlockOutputFormat::finalizeImpl() case ResultsetPart::BytesRead: writeValue(statistics.progress.read_bytes.load(), format.escaping_rules[i]); break; + case ResultsetPart::RowsBeforeAggregation: + if (!statistics.applied_aggregation) + format.throwInvalidFormat("Cannot print rows_before_aggregation for this request", i); + writeValue(statistics.rows_before_aggregation, format.escaping_rules[i]); + break; default: break; } diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h index 53d98849482..5e88d79b4a8 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h @@ -21,6 +21,11 @@ public: String getName() const override { return "TemplateBlockOutputFormat"; } void setRowsBeforeLimit(size_t rows_before_limit_) override { statistics.rows_before_limit = rows_before_limit_; statistics.applied_limit = true; } + void setRowsBeforeAggregation(size_t rows_before_aggregation_) override + { + statistics.rows_before_aggregation = rows_before_aggregation_; + statistics.applied_aggregation = true; + } void onProgress(const Progress & progress_) override { statistics.progress.incrementPiecewiseAtomically(progress_); } enum class ResultsetPart : size_t @@ -33,7 +38,8 @@ public: RowsBeforeLimit, TimeElapsed, RowsRead, - BytesRead + BytesRead, + RowsBeforeAggregation }; static ResultsetPart stringToResultsetPart(const String & part); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index de34a8aa04f..9839f64b947 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -333,7 +333,7 @@ namespace { const DataTypeTuple & type_tuple = static_cast(data_type); - Tuple & tuple_value = value.get(); + Tuple & tuple_value = value.safeGet(); size_t src_tuple_size = tuple_value.size(); size_t dst_tuple_size = type_tuple.getElements().size(); @@ -360,7 +360,7 @@ namespace if (element_type.isNullable()) return; - Array & array_value = value.get(); + Array & array_value = value.safeGet(); size_t array_value_size = array_value.size(); for (size_t i = 0; i < array_value_size; ++i) @@ -378,12 +378,12 @@ namespace const auto & key_type = *type_map.getKeyType(); const auto & value_type = *type_map.getValueType(); - auto & map = value.get(); + auto & map = value.safeGet(); size_t map_size = map.size(); for (size_t i = 0; i < map_size; ++i) { - auto & map_entry = map[i].get(); + auto & map_entry = map[i].safeGet(); auto & entry_key = map_entry[0]; auto & entry_value = map_entry[1]; diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index 52c161c3208..b19fcfd4a4a 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -191,6 +191,7 @@ void XMLRowOutputFormat::finalizeImpl() writeRowsBeforeLimitAtLeast(); + writeRowsBeforeAggregationAtLeast(); if (!exception_message.empty()) writeException(); @@ -219,6 +220,16 @@ void XMLRowOutputFormat::writeRowsBeforeLimitAtLeast() } } +void XMLRowOutputFormat::writeRowsBeforeAggregationAtLeast() +{ + if (statistics.applied_aggregation) + { + writeCString("\t", *ostr); + writeIntText(statistics.rows_before_aggregation, *ostr); + writeCString("\n", *ostr); + } +} + void XMLRowOutputFormat::writeStatistics() { writeCString("\t\n", *ostr); diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.h b/src/Processors/Formats/Impl/XMLRowOutputFormat.h index daf03539d0b..792acd118c8 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.h +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.h @@ -48,6 +48,11 @@ private: statistics.rows_before_limit = rows_before_limit_; } + void setRowsBeforeAggregation(size_t rows_before_aggregation_) override + { + statistics.applied_aggregation = true; + statistics.rows_before_aggregation = rows_before_aggregation_; + } void onRowsReadBeforeUpdate() override { row_count = getRowsReadBefore(); } void onProgress(const Progress & value) override; @@ -56,6 +61,7 @@ private: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); void writeRowsBeforeLimitAtLeast(); + void writeRowsBeforeAggregationAtLeast(); void writeStatistics(); void writeException(); diff --git a/src/Processors/Formats/LazyOutputFormat.cpp b/src/Processors/Formats/LazyOutputFormat.cpp index 4f6b10dd068..dc099765870 100644 --- a/src/Processors/Formats/LazyOutputFormat.cpp +++ b/src/Processors/Formats/LazyOutputFormat.cpp @@ -45,4 +45,8 @@ void LazyOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) info.setRowsBeforeLimit(rows_before_limit); } +void LazyOutputFormat::setRowsBeforeAggregation(size_t rows_before_aggregation) +{ + info.setRowsBeforeAggregation(rows_before_aggregation); +} } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index c803ed5dc61..5acb6cf3bf3 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -28,6 +28,7 @@ public: ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; + void setRowsBeforeAggregation(size_t rows_before_aggregation) override; void onCancel() noexcept override { diff --git a/src/Processors/Formats/PullingOutputFormat.cpp b/src/Processors/Formats/PullingOutputFormat.cpp index b2378e62d34..37050fb9675 100644 --- a/src/Processors/Formats/PullingOutputFormat.cpp +++ b/src/Processors/Formats/PullingOutputFormat.cpp @@ -42,5 +42,8 @@ void PullingOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) { info.setRowsBeforeLimit(rows_before_limit); } - +void PullingOutputFormat::setRowsBeforeAggregation(size_t rows_before_aggregation) +{ + info.setRowsBeforeAggregation(rows_before_aggregation); +} } diff --git a/src/Processors/Formats/PullingOutputFormat.h b/src/Processors/Formats/PullingOutputFormat.h index a8efb8dd962..f2546cca180 100644 --- a/src/Processors/Formats/PullingOutputFormat.h +++ b/src/Processors/Formats/PullingOutputFormat.h @@ -22,6 +22,7 @@ public: ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; + void setRowsBeforeAggregation(size_t rows_before_aggregation) override; bool expectMaterializedColumns() const override { return false; } diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index edb4d662d8b..fc595a7b565 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -55,9 +55,12 @@ void IProcessor::dump() const } -std::string IProcessor::statusToName(Status status) +std::string IProcessor::statusToName(std::optional status) { - switch (status) + if (status == std::nullopt) + return "NotStarted"; + + switch (*status) { case Status::NeedData: return "NeedData"; diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 68415534912..02b8a3daa28 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -21,8 +21,8 @@ class IQueryPlanStep; struct StorageLimits; using StorageLimitsList = std::list; -class RowsBeforeLimitCounter; -using RowsBeforeLimitCounterPtr = std::shared_ptr; +class RowsBeforeStepCounter; +using RowsBeforeStepCounterPtr = std::shared_ptr; class IProcessor; using ProcessorPtr = std::shared_ptr; @@ -162,7 +162,7 @@ public: ExpandPipeline, }; - static std::string statusToName(Status status); + static std::string statusToName(std::optional status); /** Method 'prepare' is responsible for all cheap ("instantaneous": O(1) of data volume, no wait) calculations. * @@ -377,7 +377,11 @@ public: /// Set rows_before_limit counter for current processor. /// This counter is used to calculate the number of rows right before any filtration of LimitTransform. - virtual void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr /* counter */) {} + virtual void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr /* counter */) { } + + /// Set rows_before_aggregation counter for current processor. + /// This counter is used to calculate the number of rows right before AggregatingTransform. + virtual void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr /* counter */) { } protected: virtual void onCancel() noexcept {} diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index 33ff968985f..45ae5b0ce81 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -1,8 +1,8 @@ #pragma once -#include -#include #include +#include +#include namespace DB { @@ -30,7 +30,7 @@ private: std::vector sort_column_positions; UInt64 rows_read = 0; /// including the last read block - RowsBeforeLimitCounterPtr rows_before_limit_at_least; + RowsBeforeStepCounterPtr rows_before_limit_at_least; /// State of port's pair. /// Chunks from different port pairs are not mixed for better cache locality. @@ -71,7 +71,7 @@ public: InputPort & getInputPort() { return inputs.front(); } OutputPort & getOutputPort() { return outputs.front(); } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } void setInputPortHasCounter(size_t pos) { ports_data[pos].input_port_has_counter = true; } }; diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index c6d7e844c65..c96ad3db525 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -3,6 +3,11 @@ #include #include +namespace ProfileEvents +{ + extern const Event AggregatingSortedMilliseconds; +} + namespace DB { @@ -29,6 +34,11 @@ public: } String getName() const override { return "AggregatingSortedTransform"; } + + void onFinish() override + { + logMergedStats(ProfileEvents::AggregatingSortedMilliseconds, "Aggregated sorted", getLogger("AggregatingSortedTransform")); + } }; } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index 53c103e7038..908994e1851 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -30,6 +30,8 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; + MergedStats getMergedStats() const override { return merged_data.getMergedStats(); } + /// Stores information for aggregation of SimpleAggregateFunction columns struct SimpleAggregateDescription { diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..477566d8a94 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -126,6 +126,9 @@ IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() { + total_merged_rows += accumulated_rows; + total_merged_bytes += accumulated_bytes; + accumulated_rows = 0; accumulated_bytes = 0; diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h index cc6578e79be..c34028b1cba 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h @@ -50,6 +50,8 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; + MergedStats getMergedStats() const override { return {.bytes = accumulated_bytes, .rows = accumulated_rows, .blocks = chunk_num}; } + private: Chunk prepareToMerge(); void addToAggregation(); @@ -92,6 +94,9 @@ private: UInt64 chunk_num = 0; size_t accumulated_rows = 0; size_t accumulated_bytes = 0; + + size_t total_merged_rows = 0; + size_t total_merged_bytes = 0; }; } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index aaa3859efb6..cb2775c968d 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -33,6 +33,8 @@ public: const char * getName() const override { return "GraphiteRollupSortedAlgorithm"; } Status merge() override; + MergedStats getMergedStats() const override { return merged_data->getMergedStats(); } + struct ColumnsDefinition { size_t path_column_num; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h index 9a1c7c24270..83f11232b71 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { @@ -65,6 +65,15 @@ public: IMergingAlgorithm() = default; virtual ~IMergingAlgorithm() = default; + + struct MergedStats + { + UInt64 bytes = 0; + UInt64 rows = 0; + UInt64 blocks = 0; + }; + + virtual MergedStats getMergedStats() const = 0; }; // TODO: use when compile with clang which could support it diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h index bc1aafe93f7..1725108ac5d 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h @@ -16,6 +16,8 @@ public: void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; + MergedStats getMergedStats() const override { return merged_data->getMergedStats(); } + private: Block header; SortDescription description; diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index c5bb074bb0c..8f47f89d8ee 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -183,6 +183,8 @@ public: UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } UInt64 maxBlockSize() const { return max_block_size; } + IMergingAlgorithm::MergedStats getMergedStats() const { return {.bytes = total_allocated_bytes, .rows = total_merged_rows, .blocks = total_chunks}; } + virtual ~MergedData() = default; protected: diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h index bcb111baadf..c889668a38e 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h @@ -31,7 +31,7 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; - const MergedData & getMergedData() const { return merged_data; } + MergedStats getMergedStats() const override { return merged_data.getMergedStats(); } private: Block header; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index e2c6371c44f..80c00f91d82 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -127,14 +127,14 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row right(left.size()); for (size_t col_num : desc.key_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); for (size_t col_num : desc.val_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & { - return matrix[i].get()[j]; + return matrix[i].safeGet()[j]; }; auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array @@ -160,7 +160,7 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, auto merge = [&](const Row & matrix) { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); + size_t rows = matrix[desc.key_col_nums[0]].safeGet().size(); for (size_t j = 0; j < rows; ++j) { @@ -190,10 +190,10 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, for (const auto & key_value : merged) { for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + row[desc.key_col_nums[col_num_index]].safeGet()[row_num] = key_value.first[col_num_index]; for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + row[desc.val_col_nums[col_num_index]].safeGet()[row_num] = key_value.second[col_num_index]; ++row_num; } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index 664b171c4b9..74b4e397831 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -30,6 +30,8 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; + MergedStats getMergedStats() const override { return merged_data.getMergedStats(); } + struct AggregateDescription; struct MapDescription; diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 4479ac82f66..99fb700abf1 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -3,6 +3,11 @@ #include #include +namespace ProfileEvents +{ + extern const Event CollapsingSortedMilliseconds; +} + namespace DB { @@ -36,6 +41,11 @@ public: } String getName() const override { return "CollapsingSortedTransform"; } + + void onFinish() override + { + logMergedStats(ProfileEvents::CollapsingSortedMilliseconds, "Collapsed sorted", getLogger("CollapsingSortedTransform")); + } }; } diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..e5cd3bdde46 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -2,7 +2,10 @@ #include #include +#include #include +#include +#include namespace DB { @@ -110,6 +113,8 @@ public: void work() override { + Stopwatch watch{CLOCK_MONOTONIC_COARSE}; + if (!state.init_chunks.empty()) algorithm.initialize(std::move(state.init_chunks)); @@ -147,6 +152,8 @@ public: // std::cerr << "Finished" << std::endl; state.is_finished = true; } + + merging_elapsed_ns += watch.elapsedNanoseconds(); } protected: @@ -156,7 +163,33 @@ protected: Algorithm algorithm; /// Profile info. - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + UInt64 merging_elapsed_ns = 0; + + void logMergedStats(ProfileEvents::Event elapsed_ms_event, std::string_view transform_message, LoggerPtr log) const + { + auto stats = algorithm.getMergedStats(); + + UInt64 elapsed_ms = merging_elapsed_ns / 1000000LL; + ProfileEvents::increment(elapsed_ms_event, elapsed_ms); + + /// Don't print info for small parts (< 1M rows) + if (stats.rows < 1000000) + return; + + double seconds = static_cast(merging_elapsed_ns) / 1000000000ULL; + + if (seconds == 0.0) + { + LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in 0 sec.", + transform_message, stats.blocks, stats.rows, stats.bytes); + } + else + { + LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in {} sec., {} rows/sec., {}/sec.", + transform_message, stats.blocks, stats.rows, stats.bytes, + seconds, stats.rows / seconds, ReadableSize(stats.bytes / seconds)); + } + } private: using IMergingTransformBase::state; diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 338b1ff7935..d2895a2a2e9 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -1,9 +1,12 @@ #include #include #include - #include -#include + +namespace ProfileEvents +{ + extern const Event MergingSortedMilliseconds; +} namespace DB { @@ -18,7 +21,6 @@ MergingSortedTransform::MergingSortedTransform( UInt64 limit_, bool always_read_till_end_, WriteBuffer * out_row_sources_buf_, - bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform( @@ -37,7 +39,6 @@ MergingSortedTransform::MergingSortedTransform( limit_, out_row_sources_buf_, use_average_block_sizes) - , quiet(quiet_) { } @@ -48,22 +49,7 @@ void MergingSortedTransform::onNewInput() void MergingSortedTransform::onFinish() { - if (quiet) - return; - - const auto & merged_data = algorithm.getMergedData(); - - auto log = getLogger("MergingSortedTransform"); - - double seconds = total_stopwatch.elapsedSeconds(); - - if (seconds == 0.0) - LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in 0 sec.", merged_data.totalChunks(), merged_data.totalMergedRows()); - else - LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in {} sec., {} rows/sec., {}/sec", - merged_data.totalChunks(), merged_data.totalMergedRows(), seconds, - merged_data.totalMergedRows() / seconds, - ReadableSize(merged_data.totalAllocatedBytes() / seconds)); + logMergedStats(ProfileEvents::MergingSortedMilliseconds, "Merged sorted", getLogger("MergingSortedTransform")); } } diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 2b53939f309..6e52450efa7 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -21,7 +21,6 @@ public: UInt64 limit_ = 0, bool always_read_till_end_ = false, WriteBuffer * out_row_sources_buf_ = nullptr, - bool quiet_ = false, bool use_average_block_sizes = false, bool have_all_inputs_ = true); @@ -30,9 +29,6 @@ public: protected: void onNewInput() override; void onFinish() override; - -private: - bool quiet = false; }; } diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 2657987f161..dc262aab9ee 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -3,6 +3,10 @@ #include #include +namespace ProfileEvents +{ + extern const Event ReplacingSortedMilliseconds; +} namespace DB { @@ -38,6 +42,11 @@ public: } String getName() const override { return "ReplacingSorted"; } + + void onFinish() override + { + logMergedStats(ProfileEvents::ReplacingSortedMilliseconds, "Replaced sorted", getLogger("ReplacingSortedTransform")); + } }; } diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 70ddebfea95..d7c20223d7e 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -3,6 +3,11 @@ #include #include +namespace ProfileEvents +{ + extern const Event SummingSortedMilliseconds; +} + namespace DB { @@ -33,6 +38,11 @@ public: } String getName() const override { return "SummingSortedTransform"; } + + void onFinish() override + { + logMergedStats(ProfileEvents::SummingSortedMilliseconds, "Summed sorted", getLogger("SummingSortedTransform")); + } }; } diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index 18244469bd7..32b5d7bf343 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -3,6 +3,10 @@ #include #include +namespace ProfileEvents +{ + extern const Event VersionedCollapsingSortedMilliseconds; +} namespace DB { @@ -33,6 +37,11 @@ public: } String getName() const override { return "VersionedCollapsingTransform"; } + + void onFinish() override + { + logMergedStats(ProfileEvents::VersionedCollapsingSortedMilliseconds, "Versioned collapsed sorted", getLogger("VersionedCollapsingTransform")); + } }; } diff --git a/src/Processors/OffsetTransform.h b/src/Processors/OffsetTransform.h index 79a7d15fe0b..04486a6c940 100644 --- a/src/Processors/OffsetTransform.h +++ b/src/Processors/OffsetTransform.h @@ -1,8 +1,8 @@ #pragma once -#include -#include #include +#include +#include namespace DB { @@ -16,7 +16,7 @@ private: UInt64 offset; UInt64 rows_read = 0; /// including the last read block - RowsBeforeLimitCounterPtr rows_before_limit_at_least; + RowsBeforeStepCounterPtr rows_before_limit_at_least; /// State of port's pair. /// Chunks from different port pairs are not mixed for better cache locality. @@ -45,7 +45,7 @@ public: InputPort & getInputPort() { return inputs.front(); } OutputPort & getOutputPort() { return outputs.front(); } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } }; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 52d1931c51e..b31ee7ea53c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -255,20 +255,13 @@ static void appendAggregateFunctions( const auto * node = input; - if (node->result_name != aggregate.column_name) - { - if (DataTypeAggregateFunction::strictEquals(type, node->result_type)) - { - node = &proj_dag.addAlias(*node, aggregate.column_name); - } - else - { - /// Cast to aggregate types specified in query if it's not - /// strictly the same as the one specified in projection. This - /// is required to generate correct results during finalization. - node = &proj_dag.addCast(*node, type, aggregate.column_name); - } - } + if (!DataTypeAggregateFunction::strictEquals(type, node->result_type)) + /// Cast to aggregate types specified in query if it's not + /// strictly the same as the one specified in projection. This + /// is required to generate correct results during finalization. + node = &proj_dag.addCast(*node, type, aggregate.column_name); + else if (node->result_name != aggregate.column_name) + node = &proj_dag.addAlias(*node, aggregate.column_name); proj_dag_outputs.push_back(node); } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 7cac7bee6ec..f0094f0f8d2 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -59,9 +60,10 @@ public: if (typeid_cast(current_step) || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + || typeid_cast(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (4) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (5) aggregation change order { logStep("nodes_affect_order/push", current_node); nodes_affect_order.push_back(current_node); diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index a12fce95b10..63c10a11913 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -49,7 +49,7 @@ bool isSafePrimaryDataKeyType(const IDataType & data_type) case TypeIndex::Float32: case TypeIndex::Float64: case TypeIndex::Nullable: - case TypeIndex::Object: + case TypeIndex::ObjectDeprecated: return false; case TypeIndex::Array: { diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 901d7c61167..734e67bda24 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -24,8 +24,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -52,6 +52,8 @@ #include #include +#include "config.h" + using namespace DB; namespace @@ -350,7 +352,15 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; + multiplier = 1.0f; + } + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else @@ -519,7 +529,15 @@ Pipe ReadFromMergeTree::readInOrder( .number_of_current_replica = client_info.number_of_current_replica, }; - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; + multiplier = 1.0f; + } + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else @@ -1474,16 +1492,14 @@ static void buildIndexes( else { MergeTreeIndexConditionPtr condition; - if (index_helper->isVectorSearch()) + if (index_helper->isVectorSimilarityIndex()) { -#ifdef ENABLE_ANNOY - if (const auto * annoy = typeid_cast(index_helper.get())) - condition = annoy->createIndexCondition(query_info, context); -#endif -#ifdef ENABLE_USEARCH - if (const auto * usearch = typeid_cast(index_helper.get())) - condition = usearch->createIndexCondition(query_info, context); +#if USE_USEARCH + if (const auto * vector_similarity_index = typeid_cast(index_helper.get())) + condition = vector_similarity_index->createIndexCondition(query_info, context); #endif + if (const auto * legacy_vector_similarity_index = typeid_cast(index_helper.get())) + condition = legacy_vector_similarity_index->createIndexCondition(query_info, context); if (!condition) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name); } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 2080e29ceba..596d08845e1 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -119,23 +119,23 @@ using RangesWithStep = std::vector; std::optional steppedRangeFromRange(const Range & r, UInt64 step, UInt64 remainder) { - if ((r.right.get() == 0) && (!r.right_included)) + if ((r.right.safeGet() == 0) && (!r.right_included)) return std::nullopt; - UInt64 begin = (r.left.get() / step) * step; + UInt64 begin = (r.left.safeGet() / step) * step; if (begin > std::numeric_limits::max() - remainder) return std::nullopt; begin += remainder; - while ((r.left_included <= r.left.get()) && (begin <= r.left.get() - r.left_included)) + while ((r.left_included <= r.left.safeGet()) && (begin <= r.left.safeGet() - r.left_included)) { if (std::numeric_limits::max() - step < begin) return std::nullopt; begin += step; } - if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) + if ((begin >= r.right_included) && (begin - r.right_included >= r.right.safeGet())) return std::nullopt; - UInt64 right_edge_included = r.right.get() - (1 - r.right_included); + UInt64 right_edge_included = r.right.safeGet() - (1 - r.right_included); return std::optional{RangeWithStep{begin, step, static_cast(right_edge_included - begin) / step + 1}}; } diff --git a/src/Processors/RowsBeforeLimitCounter.h b/src/Processors/RowsBeforeLimitCounter.h deleted file mode 100644 index f5eb40ff84a..00000000000 --- a/src/Processors/RowsBeforeLimitCounter.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - -/// This class helps to calculate rows_before_limit_at_least. -class RowsBeforeLimitCounter -{ -public: - void add(uint64_t rows) - { - setAppliedLimit(); - rows_before_limit.fetch_add(rows, std::memory_order_release); - } - - void set(uint64_t rows) - { - setAppliedLimit(); - rows_before_limit.store(rows, std::memory_order_release); - } - - uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } - - void setAppliedLimit() { has_applied_limit.store(true, std::memory_order_release); } - bool hasAppliedLimit() const { return has_applied_limit.load(std::memory_order_acquire); } - -private: - std::atomic rows_before_limit = 0; - std::atomic_bool has_applied_limit = false; -}; - -using RowsBeforeLimitCounterPtr = std::shared_ptr; - -} diff --git a/src/Processors/RowsBeforeStepCounter.h b/src/Processors/RowsBeforeStepCounter.h new file mode 100644 index 00000000000..789731f82bd --- /dev/null +++ b/src/Processors/RowsBeforeStepCounter.h @@ -0,0 +1,36 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// This class helps to calculate rows_before_limit_at_least and rows_before_aggregation. +class RowsBeforeStepCounter +{ +public: + void add(uint64_t rows) + { + setAppliedStep(); + rows_before_step.fetch_add(rows, std::memory_order_release); + } + + void set(uint64_t rows) + { + setAppliedStep(); + rows_before_step.store(rows, std::memory_order_release); + } + + uint64_t get() const { return rows_before_step.load(std::memory_order_acquire); } + + void setAppliedStep() { has_applied_step.store(true, std::memory_order_release); } + bool hasAppliedStep() const { return has_applied_step.load(std::memory_order_acquire); } + +private: + std::atomic rows_before_step = 0; + std::atomic_bool has_applied_step = false; +}; + +using RowsBeforeStepCounterPtr = std::shared_ptr; + +} diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index f7928f89015..788017e3df0 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -139,6 +139,12 @@ void DelayedSource::work() processor->setRowsBeforeLimitCounter(rows_before_limit); } + if (rows_before_aggregation) + { + for (auto & processor : processors) + processor->setRowsBeforeAggregationCounter(rows_before_aggregation); + } + synchronizePorts(totals_output, totals, header, processors); synchronizePorts(extremes_output, extremes, header, processors); } diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index 0b2751e18a6..4ee90e34599 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -30,13 +30,15 @@ public: OutputPort * getTotalsPort() { return totals; } OutputPort * getExtremesPort() { return extremes; } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { rows_before_limit.swap(counter); } + void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr counter) override { rows_before_aggregation.swap(counter); } private: QueryPlanResourceHolder resources; Creator creator; Processors processors; - RowsBeforeLimitCounterPtr rows_before_limit; + RowsBeforeStepCounterPtr rows_before_limit; + RowsBeforeStepCounterPtr rows_before_aggregation; /// Outputs for DelayedSource. OutputPort * main = nullptr; diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 5d533a7747e..52be9a6e84a 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -219,11 +219,11 @@ namespace read_bytes_size += 8; break; case ValueType::vtEnum8: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtEnum16: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtString: diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index a3d6fd691d8..b9bda46bd10 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -35,9 +35,9 @@ PostgreSQLSource::PostgreSQLSource( const Block & sample_block, UInt64 max_block_size_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) , max_block_size(max_block_size_) , connection_holder(std::move(connection_holder_)) + , query_str(query_str_) { init(sample_block); } @@ -51,10 +51,10 @@ PostgreSQLSource::PostgreSQLSource( UInt64 max_block_size_, bool auto_commit_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) - , tx(std::move(tx_)) , max_block_size(max_block_size_) , auto_commit(auto_commit_) + , query_str(query_str_) + , tx(std::move(tx_)) { init(sample_block); } @@ -204,15 +204,15 @@ PostgreSQLSource::~PostgreSQLSource() */ stream->close(); } - - stream.reset(); - tx.reset(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } + stream.reset(); + tx.reset(); + if (connection_holder) connection_holder->setBroken(); } diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 8a648ae8bb5..319c5d8d7c2 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -38,14 +38,12 @@ protected: UInt64 max_block_size_, bool auto_commit_); - String query_str; - std::shared_ptr tx; - std::unique_ptr stream; - Status prepare() override; - void onStart(); Chunk generate() override; + + void onStart(); + void onFinish(); private: @@ -61,6 +59,12 @@ private: postgres::ConnectionHolderPtr connection_holder; std::unordered_map array_info; + +protected: + String query_str; + /// tx and stream must be destroyed before connection_holder. + std::shared_ptr tx; + std::unique_ptr stream; }; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 357c133afa2..c2da5753a27 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -37,16 +37,23 @@ RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation progress(value.read_rows, value.read_bytes); }); - query_executor->setProfileInfoCallback([this](const ProfileInfo & info) - { - if (rows_before_limit) + query_executor->setProfileInfoCallback( + [this](const ProfileInfo & info) { - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - else - manually_add_rows_before_limit_counter = true; /// Remote subquery doesn't contain a limit - } - }); + if (rows_before_limit) + { + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + else + manually_add_rows_before_limit_counter = true; /// Remote subquery doesn't contain a limit + } + + if (rows_before_aggregation) + { + if (info.hasAppliedAggregation()) + rows_before_aggregation->add(info.getRowsBeforeAggregation()); + } + }); } RemoteSource::~RemoteSource() = default; @@ -184,7 +191,6 @@ std::optional RemoteSource::tryGenerate() { if (manually_add_rows_before_limit_counter) rows_before_limit->add(rows); - query_executor->finish(); return {}; } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 9944b27f734..c9d1a647779 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -1,10 +1,10 @@ #pragma once #include -#include +#include #include -#include +#include namespace DB { @@ -25,7 +25,8 @@ public: void work() override; String getName() const override { return "Remote"; } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { rows_before_limit.swap(counter); } + void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr counter) override { rows_before_aggregation.swap(counter); } /// Stop reading from stream if output port is finished. void onUpdatePorts() override; @@ -46,7 +47,8 @@ private: bool executor_finished = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; - RowsBeforeLimitCounterPtr rows_before_limit; + RowsBeforeStepCounterPtr rows_before_limit; + RowsBeforeStepCounterPtr rows_before_aggregation; const bool async_read; const bool async_query_sending; diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 55eaf67eb3b..f55a3713215 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -8,13 +8,15 @@ #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include + #include +#include namespace DB { @@ -68,11 +70,17 @@ static void makeFdBlocking(int fd) static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds) { + auto logger = getLogger("TimeoutReadBufferFromFileDescriptor"); + auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; + int res; while (true) { Stopwatch watch; + + LOG_TEST(logger, "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", ")); + res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); if (res < 0) @@ -82,7 +90,10 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond const auto elapsed = watch.elapsedMilliseconds(); if (timeout_milliseconds <= elapsed) + { + LOG_TEST(logger, "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds); break; + } timeout_milliseconds -= elapsed; } else @@ -91,6 +102,12 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond } } + LOG_TEST( + logger, + "Poll for descriptors: {} returned {}", + fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "), + res); + return res; } @@ -200,12 +217,6 @@ public: return true; } - void reset() const - { - makeFdBlocking(stdout_fd); - makeFdBlocking(stderr_fd); - } - ~TimeoutReadBufferFromFileDescriptor() override { tryMakeFdBlocking(stdout_fd); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..f8bc419b623 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -81,6 +81,8 @@ void AggregatingInOrderTransform::consume(Chunk chunk) is_consume_started = true; } + if (rows_before_aggregation) + rows_before_aggregation->add(rows); src_rows += rows; src_bytes += chunk.bytes(); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..2ac7ffba1aa 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -45,6 +45,7 @@ public: void work() override; void consume(Chunk chunk); + void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr counter) override { rows_before_aggregation.swap(counter); } private: void generate(); @@ -86,6 +87,8 @@ private: Chunk current_chunk; Chunk to_push_chunk; + RowsBeforeStepCounterPtr rows_before_aggregation; + LoggerPtr log = getLogger("AggregatingInOrderTransform"); }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index c09b9567cc1..c9ada32b839 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -8,7 +8,6 @@ #include #include #include - #include @@ -676,7 +675,8 @@ void AggregatingTransform::consume(Chunk chunk) LOG_TRACE(log, "Aggregating"); is_consume_started = true; } - + if (rows_before_aggregation) + rows_before_aggregation->add(num_rows); src_rows += num_rows; src_bytes += chunk.bytes(); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..b9212375c91 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -4,11 +4,13 @@ #include #include #include -#include -#include -#include +#include #include #include +#include +#include +#include + namespace CurrentMetrics { @@ -168,6 +170,7 @@ public: Status prepare() override; void work() override; Processors expandPipeline() override; + void setRowsBeforeAggregationCounter(RowsBeforeStepCounterPtr counter) override { rows_before_aggregation.swap(counter); } protected: void consume(Chunk chunk); @@ -211,6 +214,8 @@ private: bool is_consume_started = false; + RowsBeforeStepCounterPtr rows_before_aggregation; + void initGenerate(); }; diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index 15f8355bdc7..52fa42fdb51 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -1,11 +1,15 @@ #include +#include #include #include #include #include #include -#include +namespace ProfileEvents +{ + extern const Event GatheringColumnMilliseconds; +} namespace DB { @@ -33,6 +37,13 @@ ColumnGathererStream::ColumnGathererStream( throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather"); } +void ColumnGathererStream::updateStats(const IColumn & column) +{ + merged_rows += column.size(); + merged_bytes += column.allocatedBytes(); + ++merged_blocks; +} + void ColumnGathererStream::initialize(Inputs inputs) { Columns source_columns; @@ -82,7 +93,9 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() { res.addColumn(source_to_fully_copy->column); } - merged_rows += source_to_fully_copy->size; + + updateStats(*source_to_fully_copy->column); + source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; return Status(std::move(res)); @@ -96,8 +109,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() { next_required_source = 0; Chunk res; - merged_rows += sources.front().column->size(); - merged_bytes += sources.front().column->allocatedBytes(); + updateStats(*sources.front().column); res.addColumn(std::move(sources.front().column)); sources.front().pos = sources.front().size = 0; return Status(std::move(res)); @@ -123,8 +135,8 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() if (source_to_fully_copy && result_column->empty()) { Chunk res; - merged_rows += source_to_fully_copy->column->size(); - merged_bytes += source_to_fully_copy->column->allocatedBytes(); + updateStats(*source_to_fully_copy->column); + if (result_column->hasDynamicStructure()) { auto col = result_column->cloneEmpty(); @@ -140,13 +152,13 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() return Status(std::move(res)); } - auto col = result_column->cloneEmpty(); - result_column.swap(col); + auto return_column = result_column->cloneEmpty(); + result_column.swap(return_column); Chunk res; - merged_rows += col->size(); - merged_bytes += col->allocatedBytes(); - res.addColumn(std::move(col)); + updateStats(*return_column); + + res.addColumn(std::move(return_column)); return Status(std::move(res), row_sources_buf.eof() && !source_to_fully_copy); } @@ -185,31 +197,10 @@ ColumnGathererTransform::ColumnGathererTransform( toString(header.columns())); } -void ColumnGathererTransform::work() -{ - Stopwatch stopwatch; - IMergingTransform::work(); - elapsed_ns += stopwatch.elapsedNanoseconds(); -} - void ColumnGathererTransform::onFinish() { - auto merged_rows = algorithm.getMergedRows(); - auto merged_bytes = algorithm.getMergedRows(); - /// Don't print info for small parts (< 10M rows) - if (merged_rows < 10000000) - return; - - double seconds = static_cast(elapsed_ns) / 1000000000ULL; const auto & column_name = getOutputPort().getHeader().getByPosition(0).name; - - if (seconds == 0.0) - LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.", - column_name, static_cast(merged_bytes) / merged_rows); - else - LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.", - column_name, static_cast(merged_bytes) / merged_rows, seconds, - merged_rows / seconds, ReadableSize(merged_bytes / seconds)); + logMergedStats(ProfileEvents::GatheringColumnMilliseconds, fmt::format("Gathered column {}", column_name), log); } } diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index ec5691316ce..fbc9a6bfcc6 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -72,10 +72,11 @@ public: template void gather(Column & column_res); - UInt64 getMergedRows() const { return merged_rows; } - UInt64 getMergedBytes() const { return merged_bytes; } + MergedStats getMergedStats() const override { return {.bytes = merged_bytes, .rows = merged_rows, .blocks = merged_blocks}; } private: + void updateStats(const IColumn & column); + /// Cache required fields struct Source { @@ -105,6 +106,7 @@ private: ssize_t next_required_source = -1; UInt64 merged_rows = 0; UInt64 merged_bytes = 0; + UInt64 merged_blocks = 0; }; class ColumnGathererTransform final : public IMergingTransform @@ -120,12 +122,8 @@ public: String getName() const override { return "ColumnGathererTransform"; } - void work() override; - protected: void onFinish() override; - UInt64 elapsed_ns = 0; - LoggerPtr log; }; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 9601f821cc8..95f4a674ebb 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -62,7 +62,7 @@ static FillColumnDescription::StepFunction getStepFunction( case IntervalKind::Kind::NAME: \ return [step, scale, &date_lut](Field & field) { \ field = Add##NAME##sImpl::execute(static_cast(\ - field.get()), static_cast(step), date_lut, utc_time_zone, scale); }; + field.safeGet()), static_cast(step), date_lut, utc_time_zone, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE @@ -139,21 +139,21 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & { if (which.isDate() || which.isDate32()) { - Int64 avg_seconds = descr.fill_step.get() * descr.step_kind->toAvgSeconds(); + Int64 avg_seconds = descr.fill_step.safeGet() * descr.step_kind->toAvgSeconds(); if (std::abs(avg_seconds) < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds)); } if (which.isDate()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (which.isDate32()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (const auto * date_time = checkAndGetDataType(type.get())) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), date_time->getTimeZone()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), date_time->getTimeZone()); else if (const auto * date_time64 = checkAndGetDataType(type.get())) { - const auto & step_dec = descr.fill_step.get &>(); + const auto & step_dec = descr.fill_step.safeGet &>(); Int64 step = DecimalUtils::convertTo(step_dec.getValue(), step_dec.getScale()); static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); @@ -163,7 +163,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & case IntervalKind::Kind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ - auto field_decimal = field.get>(); \ + auto field_decimal = field.safeGet>(); \ auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, utc_time_zone, field_decimal.getScale()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index e96a75d277b..6abfa0fccd0 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -511,6 +511,16 @@ void MergeJoinAlgorithm::logElapsed(double seconds) stat.max_blocks_loaded); } +IMergingAlgorithm::MergedStats MergeJoinAlgorithm::getMergedStats() const +{ + return + { + .bytes = stat.num_bytes[0] + stat.num_bytes[1], + .rows = stat.num_rows[0] + stat.num_rows[1], + .blocks = stat.num_blocks[0] + stat.num_blocks[1], + }; +} + static void prepareChunk(Chunk & chunk) { if (!chunk) @@ -547,6 +557,7 @@ void MergeJoinAlgorithm::consume(Input & input, size_t source_num) { stat.num_blocks[source_num] += 1; stat.num_rows[source_num] += input.chunk.getNumRows(); + stat.num_bytes[source_num] += input.chunk.allocatedBytes(); } prepareChunk(input.chunk); @@ -1271,7 +1282,7 @@ MergeJoinTransform::MergeJoinTransform( void MergeJoinTransform::onFinish() { - algorithm.logElapsed(total_stopwatch.elapsedSeconds()); + algorithm.logElapsed(static_cast(merging_elapsed_ns) / 1000000000ULL); } } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index d37a0b9f3ae..8f74974af0f 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -245,6 +245,8 @@ public: void setAsofInequality(ASOFJoinInequality asof_inequality_); void logElapsed(double seconds); + MergedStats getMergedStats() const override; + private: std::optional handleAnyJoinState(); Status anyJoin(); @@ -280,6 +282,7 @@ private: { size_t num_blocks[2] = {0, 0}; size_t num_rows[2] = {0, 0}; + size_t num_bytes[2] = {0, 0}; size_t max_blocks_loaded = 0; }; diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index ede13b29219..c45192e7118 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -185,7 +185,6 @@ void MergeSortingTransform::consume(Chunk chunk) if (!external_merging_sorted) { - bool quiet = false; bool have_all_inputs = false; bool use_average_block_sizes = false; @@ -199,7 +198,6 @@ void MergeSortingTransform::consume(Chunk chunk) limit, /*always_read_till_end_=*/ false, nullptr, - quiet, use_average_block_sizes, have_all_inputs); diff --git a/src/Processors/Transforms/PartialSortingTransform.h b/src/Processors/Transforms/PartialSortingTransform.h index 8f25c93037f..73c490d5b92 100644 --- a/src/Processors/Transforms/PartialSortingTransform.h +++ b/src/Processors/Transforms/PartialSortingTransform.h @@ -1,7 +1,7 @@ #pragma once -#include -#include #include +#include +#include #include namespace DB @@ -20,7 +20,7 @@ public: String getName() const override { return "PartialSortingTransform"; } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { read_rows.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeStepCounterPtr counter) override { read_rows.swap(counter); } protected: void transform(Chunk & chunk) override; @@ -29,7 +29,7 @@ private: const SortDescription description; SortDescriptionWithPositions description_with_positions; const UInt64 limit; - RowsBeforeLimitCounterPtr read_rows; + RowsBeforeStepCounterPtr read_rows; Columns sort_description_threshold_columns; diff --git a/src/Processors/Transforms/PasteJoinTransform.cpp b/src/Processors/Transforms/PasteJoinTransform.cpp index d2fa7eed256..982a347a70f 100644 --- a/src/Processors/Transforms/PasteJoinTransform.cpp +++ b/src/Processors/Transforms/PasteJoinTransform.cpp @@ -58,6 +58,16 @@ static void prepareChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } +IMergingAlgorithm::MergedStats PasteJoinAlgorithm::getMergedStats() const +{ + return + { + .bytes = stat.num_bytes[0] + stat.num_bytes[1], + .rows = stat.num_rows[0] + stat.num_rows[1], + .blocks = stat.num_blocks[0] + stat.num_blocks[1], + }; +} + void PasteJoinAlgorithm::initialize(Inputs inputs) { if (inputs.size() != 2) diff --git a/src/Processors/Transforms/PasteJoinTransform.h b/src/Processors/Transforms/PasteJoinTransform.h index 6a7e65ee27c..c184f20362d 100644 --- a/src/Processors/Transforms/PasteJoinTransform.h +++ b/src/Processors/Transforms/PasteJoinTransform.h @@ -35,8 +35,7 @@ public: void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; Status merge() override; - - void logElapsed(double seconds); + MergedStats getMergedStats() const override; private: Chunk createBlockWithDefaults(size_t source_num); @@ -55,6 +54,7 @@ private: { size_t num_blocks[2] = {0, 0}; size_t num_rows[2] = {0, 0}; + size_t num_bytes[2] = {0, 0}; size_t max_blocks_loaded = 0; }; diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index c26cd7cc8c3..bd11aa4cd28 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -85,7 +85,7 @@ static int compareValuesWithOffset(const IColumn * _compared_column, using ValueType = typename ColumnType::ValueType; // Note that the storage type of offset returned by get<> is different, so // we need to specify the type explicitly. - const ValueType offset = static_cast(_offset.get()); + const ValueType offset = static_cast(_offset.safeGet()); assert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -140,7 +140,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, _compared_column); const auto * reference_column = assert_cast( _reference_column); - const auto offset = _offset.get(); + const auto offset = _offset.safeGet(); chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -609,7 +609,7 @@ void WindowTransform::advanceFrameStartRowsOffset() { // Just recalculate it each time by walking blocks. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.begin_offset.get() + window_description.frame.begin_offset.safeGet() * (window_description.frame.begin_preceding ? -1 : 1)); frame_start = moved_row; @@ -848,7 +848,7 @@ void WindowTransform::advanceFrameEndRowsOffset() // Walk the specified offset from the current row. The "+1" is needed // because the frame_end is a past-the-end pointer. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.end_offset.get() + window_description.frame.end_offset.safeGet() * (window_description.frame.end_preceding ? -1 : 1) + 1); @@ -1157,8 +1157,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // Initialize output columns. for (auto & ws : workspaces) { - if (ws.window_function_impl) - block.casted_columns.push_back(ws.window_function_impl->castColumn(block.input_columns, ws.argument_column_indices)); + block.casted_columns.push_back(ws.window_function_impl ? ws.window_function_impl->castColumn(block.input_columns, ws.argument_column_indices) : nullptr); block.output_columns.push_back(ws.aggregate_function->getResultType() ->createColumn()); @@ -2105,13 +2104,13 @@ namespace throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' function must be a constant"); auto type_id = argument_types[0]->getTypeId(); if (type_id == TypeIndex::UInt8) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt16) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt32) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt64) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); if (!buckets) { @@ -2337,22 +2336,9 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction argument_types[2]->getName()); } - const auto from_name = argument_types[2]->getName(); - const auto to_name = argument_types[0]->getName(); - ColumnsWithTypeAndName arguments + auto get_cast_func = [from = argument_types[2], to = argument_types[0]] { - { argument_types[2], "" }, - { - DataTypeString().createColumnConst(0, to_name), - std::make_shared(), - "" - } - }; - - auto get_cast_func = [&arguments] - { - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::accurate, {}); - return func_builder_cast->build(arguments); + return createInternalCast({from, {}}, to, CastType::accurate, {}); }; func_cast = get_cast_func(); @@ -2402,7 +2388,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset < 0) @@ -2488,7 +2474,7 @@ struct WindowFunctionNthValue final : public WindowFunction Int64 offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset <= 0) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index f678d7984e8..befe5e28b5d 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -208,6 +208,12 @@ Block executePipeline(QueryPipeline && pipeline) template void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) { + if (expected.empty()) + { + ASSERT_TRUE(block.columns() == 0); + return; + } + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); @@ -230,6 +236,12 @@ void assertColumnVectorEq(const typename ColumnVector::Container & expected, template void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) { + if (expected.empty()) + { + ASSERT_TRUE(block.columns() == 0); + return; + } + const ColumnPtr & actual = block.getByName(name).column; ASSERT_TRUE(checkColumn(*actual)); ASSERT_TRUE(checkColumn(expected)); diff --git a/src/QueryPipeline/ProfileInfo.cpp b/src/QueryPipeline/ProfileInfo.cpp index ee0ff8c69bf..69575939edc 100644 --- a/src/QueryPipeline/ProfileInfo.cpp +++ b/src/QueryPipeline/ProfileInfo.cpp @@ -1,14 +1,14 @@ #include +#include +#include #include #include -#include - namespace DB { -void ProfileInfo::read(ReadBuffer & in) +void ProfileInfo::read(ReadBuffer & in, UInt64 server_revision) { readVarUInt(rows, in); readVarUInt(blocks, in); @@ -16,10 +16,15 @@ void ProfileInfo::read(ReadBuffer & in) readBinary(applied_limit, in); readVarUInt(rows_before_limit, in); readBinary(calculated_rows_before_limit, in); + if (server_revision >= DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION) + { + readBinary(applied_aggregation, in); + readVarUInt(rows_before_aggregation, in); + } } -void ProfileInfo::write(WriteBuffer & out) const +void ProfileInfo::write(WriteBuffer & out, UInt64 client_revision) const { writeVarUInt(rows, out); writeVarUInt(blocks, out); @@ -27,6 +32,11 @@ void ProfileInfo::write(WriteBuffer & out) const writeBinary(hasAppliedLimit(), out); writeVarUInt(getRowsBeforeLimit(), out); writeBinary(calculated_rows_before_limit, out); + if (client_revision >= DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION) + { + writeBinary(hasAppliedAggregation(), out); + writeVarUInt(getRowsBeforeAggregation(), out); + } } @@ -41,6 +51,8 @@ void ProfileInfo::setFrom(const ProfileInfo & rhs, bool skip_block_size_info) applied_limit = rhs.applied_limit; rows_before_limit = rhs.rows_before_limit; calculated_rows_before_limit = rhs.calculated_rows_before_limit; + applied_aggregation = rhs.applied_aggregation; + rows_before_aggregation = rhs.rows_before_aggregation; } @@ -57,6 +69,17 @@ bool ProfileInfo::hasAppliedLimit() const return applied_limit; } +size_t ProfileInfo::getRowsBeforeAggregation() const +{ + return rows_before_aggregation; +} + + +bool ProfileInfo::hasAppliedAggregation() const +{ + return applied_aggregation; +} + void ProfileInfo::update(Block & block) { diff --git a/src/QueryPipeline/ProfileInfo.h b/src/QueryPipeline/ProfileInfo.h index 7a0a0c304e2..92c83c8c3be 100644 --- a/src/QueryPipeline/ProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -32,13 +32,16 @@ struct ProfileInfo size_t getRowsBeforeLimit() const; bool hasAppliedLimit() const; + size_t getRowsBeforeAggregation() const; + bool hasAppliedAggregation() const; + void update(Block & block); void update(size_t num_rows, size_t num_bytes); /// Binary serialization and deserialization of main fields. /// Writes only main fields i.e. fields that required by internal transmission protocol. - void read(ReadBuffer & in); - void write(WriteBuffer & out) const; + void read(ReadBuffer & in, UInt64 server_revision); + void write(WriteBuffer & out, UInt64 client_revision) const; /// Sets main fields from other object (see methods above). /// If skip_block_size_info if true, then rows, bytes and block fields are ignored. @@ -51,11 +54,21 @@ struct ProfileInfo rows_before_limit = rows_before_limit_; } + /// Only for Processors. + void setRowsBeforeAggregation(size_t rows_before_aggregation_) + { + applied_aggregation = true; + rows_before_aggregation = rows_before_aggregation_; + } + private: /// For these fields we make accessors, because they must be calculated beforehand. mutable bool applied_limit = false; /// Whether LIMIT was applied mutable size_t rows_before_limit = 0; - mutable bool calculated_rows_before_limit = false; /// Whether the field rows_before_limit was calculated + mutable bool calculated_rows_before_limit = false; /// Whether the field rows was calculated + + mutable bool applied_aggregation = false; /// Whether GROUP BY was applied + mutable size_t rows_before_aggregation = 0; }; } diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 935c006c217..c9c0bad7553 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -1,15 +1,14 @@ #include #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include @@ -17,15 +16,19 @@ #include #include #include -#include +#include +#include #include +#include #include #include #include #include -#include #include -#include +#include +#include +#include +#include namespace DB @@ -139,7 +142,7 @@ static void checkCompleted(Processors & processors) static void initRowsBeforeLimit(IOutputFormat * output_format) { - RowsBeforeLimitCounterPtr rows_before_limit_at_least; + RowsBeforeStepCounterPtr rows_before_limit_at_least; std::vector processors; std::map> limit_candidates; std::unordered_set visited; @@ -261,7 +264,7 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) if (!processors.empty()) { - rows_before_limit_at_least = std::make_shared(); + rows_before_limit_at_least = std::make_shared(); for (auto & processor : processors) processor->setRowsBeforeLimitCounter(rows_before_limit_at_least); @@ -273,7 +276,28 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); } } +static void initRowsBeforeAggregation(std::shared_ptr processors, IOutputFormat * output_format) +{ + bool has_aggregation = false; + if (!processors->empty()) + { + RowsBeforeStepCounterPtr rows_before_aggregation = std::make_shared(); + for (const auto & processor : *processors) + { + if (typeid_cast(processor.get()) || typeid_cast(processor.get())) + { + processor->setRowsBeforeAggregationCounter(rows_before_aggregation); + has_aggregation = true; + } + if (typeid_cast(processor.get()) || typeid_cast(processor.get())) + processor->setRowsBeforeAggregationCounter(rows_before_aggregation); + } + if (has_aggregation) + rows_before_aggregation->add(0); + output_format->setRowsBeforeAggregationCounter(rows_before_aggregation); + } +} QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, @@ -521,6 +545,14 @@ void QueryPipeline::complete(std::shared_ptr format) extremes = nullptr; initRowsBeforeLimit(format.get()); + for (const auto & context : resources.interpreter_context) + { + if (context->getSettingsRef().rows_before_aggregation) + { + initRowsBeforeAggregation(processors, format.get()); + break; + } + } output_format = format.get(); processors->emplace_back(std::move(format)); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 803d1686ad7..d276fed60a2 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index bc22f249f97..f41a447049c 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); + 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true); pipe.addTransform(std::move(transform)); @@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); + 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true); pipe.addTransform(std::move(transform)); diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index d8a4d7f0e1f..9c8e0c6bf73 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1577,6 +1577,8 @@ namespace stats.set_allocated_bytes(info.bytes); stats.set_applied_limit(info.hasAppliedLimit()); stats.set_rows_before_limit(info.getRowsBeforeLimit()); + stats.set_applied_aggregation(info.hasAppliedAggregation()); + stats.set_rows_before_aggregation(info.getRowsBeforeAggregation()); } void Call::addLogsToResult() diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index 047db014560..39e066005b9 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -97,6 +98,21 @@ void HTTPServerConnection::run() { sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_BAD_REQUEST); } + catch (const Poco::Net::NetException & e) + { + /// Do not spam logs with messages related to connection reset by peer. + if (e.code() == POCO_ENOTCONN) + { + LOG_DEBUG(LogFrequencyLimiter(getLogger("HTTPServerConnection"), 10), "Connection reset by peer while processing HTTP request: {}", e.message()); + break; + } + + if (session.networkException()) + session.networkException()->rethrow(); + else + throw; + } + catch (const Poco::Exception &) { if (session.networkException()) diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index ac4f52e7766..51f5814556d 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -248,6 +248,8 @@ public: void attachRequest(HTTPServerRequest * request_) { request = request_; } + const Poco::Net::HTTPServerSession & getSession() const { return session; } + private: Poco::Net::HTTPServerSession & session; HTTPServerRequest * request = nullptr; diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index e2098b284bf..2fcb66ae606 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -30,7 +30,7 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() if (add_cors_header) response.set("Access-Control-Allow-Origin", "*"); - setResponseDefaultHeaders(response, keep_alive_timeout); + setResponseDefaultHeaders(response); std::stringstream header; //STYLE_CHECK_ALLOW_STD_STRING_STREAM response.beginWrite(header); @@ -119,12 +119,10 @@ void WriteBufferFromHTTPServerResponse::nextImpl() WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - UInt64 keep_alive_timeout_, const ProfileEvents::Event & write_event_) : HTTPWriteBuffer(response_.getSocket(), write_event_) , response(response_) , is_http_method_head(is_http_method_head_) - , keep_alive_timeout(keep_alive_timeout_) { } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index a3952b7c553..f0c80f24582 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -29,7 +29,6 @@ public: WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - UInt64 keep_alive_timeout_, const ProfileEvents::Event & write_event_ = ProfileEvents::end()); ~WriteBufferFromHTTPServerResponse() override; @@ -91,7 +90,6 @@ private: bool is_http_method_head; bool add_cors_header = false; - size_t keep_alive_timeout = 0; bool initialized = false; diff --git a/src/Server/HTTP/checkHTTPHeader.cpp b/src/Server/HTTP/checkHTTPHeader.cpp new file mode 100644 index 00000000000..812adde022a --- /dev/null +++ b/src/Server/HTTP/checkHTTPHeader.cpp @@ -0,0 +1,22 @@ +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNEXPECTED_HTTP_HEADERS; +} + +void checkHTTPHeader(const HTTPRequest & request, const String & header_name, const String & expected_value) +{ + if (!request.has(header_name)) + throw Exception(ErrorCodes::UNEXPECTED_HTTP_HEADERS, "No HTTP header {}", header_name); + if (request.get(header_name) != expected_value) + throw Exception(ErrorCodes::UNEXPECTED_HTTP_HEADERS, "HTTP header {} has unexpected value '{}' (instead of '{}')", header_name, request.get(header_name), expected_value); +} + +} diff --git a/src/Server/HTTP/checkHTTPHeader.h b/src/Server/HTTP/checkHTTPHeader.h new file mode 100644 index 00000000000..956599ae66b --- /dev/null +++ b/src/Server/HTTP/checkHTTPHeader.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Checks that the HTTP request has a specified header with a specified value. +void checkHTTPHeader(const HTTPRequest & request, const String & header_name, const String & expected_value); + +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp index 022a763a9a2..07a649dc396 100644 --- a/src/Server/HTTP/sendExceptionToHTTPClient.cpp +++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp @@ -29,7 +29,7 @@ void sendExceptionToHTTPClient( if (!out) { /// If nothing was sent yet. - WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT}; + WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD}; out_for_message.writeln(exception_message); out_for_message.finalize(); @@ -43,7 +43,6 @@ void sendExceptionToHTTPClient( out->position() = out->buffer().begin(); out->writeln(exception_message); - out->finalize(); } } diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index c8a58527f2c..d2bc22e98cc 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -266,7 +266,6 @@ void HTTPHandler::processQuery( std::make_shared( response, request.getMethod() == HTTPRequest::HTTP_HEAD, - context->getServerSettings().keep_alive_timeout.totalSeconds(), write_event); used_output.out = used_output.out_holder; used_output.out_maybe_compressed = used_output.out_holder; @@ -558,7 +557,7 @@ try if (!used_output.out_holder && !used_output.exception_is_written) { /// If nothing was sent yet and we don't even know if we must compress the response. - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT).writeln(s); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD).writeln(s); } else if (used_output.out_maybe_compressed) { diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 5344b2d024b..fc31ad2874e 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -1,18 +1,16 @@ -#include #include #include +#include +#include #include -#include #include #include "HTTPHandler.h" -#include "Server/PrometheusMetricsWriter.h" #include "StaticRequestHandler.h" #include "ReplicasStatusHandler.h" #include "InterserverIOHTTPHandler.h" -#include "PrometheusRequestHandler.h" #include "WebUIRequestHandler.h" @@ -124,7 +122,8 @@ static inline auto createHandlersFactoryFromConfig( } else if (handler_type == "prometheus") { - main_handler_factory->addHandler(createPrometheusHandlerFactory(server, config, async_metrics, prefix + "." + key)); + main_handler_factory->addHandler( + createPrometheusHandlerFactoryForHTTPRule(server, config, prefix + "." + key, async_metrics)); } else if (handler_type == "replicas_status") { @@ -201,10 +200,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco:: else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory") return createInterserverHTTPHandlerFactory(server, name); else if (name == "PrometheusHandler-factory") - { - auto metrics_writer = std::make_shared(config, "prometheus", async_metrics); - return createPrometheusMainHandlerFactory(server, config, metrics_writer, name); - } + return createPrometheusHandlerFactory(server, config, async_metrics, name); throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name."); } @@ -291,20 +287,9 @@ void addDefaultHandlersFactory( ); factory.addHandler(query_handler); - /// We check that prometheus handler will be served on current (default) port. - /// Otherwise it will be created separately, see createHandlerFactory(...). - if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0) - { - auto writer = std::make_shared(config, "prometheus", async_metrics); - auto creator = [&server, writer] () -> std::unique_ptr - { - return std::make_unique(server, writer); - }; - auto prometheus_handler = std::make_shared>(std::move(creator)); - prometheus_handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics")); - prometheus_handler->allowGetAndHeadRequest(); + /// createPrometheusHandlerFactoryForHTTPRuleDefaults() can return nullptr if prometheus protocols must not be served on http port. + if (auto prometheus_handler = createPrometheusHandlerFactoryForHTTPRuleDefaults(server, config, async_metrics)) factory.addHandler(prometheus_handler); - } } } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index b4c32366463..db4bb73cbc4 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -1,15 +1,12 @@ #pragma once -#include -#include #include #include #include #include -#include - #include + namespace DB { @@ -19,6 +16,7 @@ namespace ErrorCodes } class IServer; +class AsynchronousMetrics; template class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory @@ -126,18 +124,6 @@ HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); -HTTPRequestHandlerFactoryPtr -createPrometheusHandlerFactory(IServer & server, - const Poco::Util::AbstractConfiguration & config, - AsynchronousMetrics & async_metrics, - const std::string & config_prefix); - -HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory( - IServer & server, - const Poco::Util::AbstractConfiguration & config, - PrometheusMetricsWriterPtr metrics_writer, - const std::string & name); - /// @param server - used in handlers to check IServer::isCancelled() /// @param config - not the same as server.config(), since it can be newer /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true) diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index e46021c8e68..59852c79139 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -87,9 +87,8 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe response.setChunkedTransferEncoding(true); Output used_output; - const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); used_output.out = std::make_shared( - response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); + response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, write_event); auto finalize_output = [&] { diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp index 85eafbe4808..43370116015 100644 --- a/src/Server/PrometheusMetricsWriter.cpp +++ b/src/Server/PrometheusMetricsWriter.cpp @@ -1,13 +1,27 @@ #include "PrometheusMetricsWriter.h" -#include +#include +#include #include #include - -#include +#include #include "config.h" + +#if USE_NURAFT +namespace ProfileEvents +{ + extern const std::vector keeper_profile_events; +} + +namespace CurrentMetrics +{ + extern const std::vector keeper_metrics; +} +#endif + + namespace { @@ -107,100 +121,84 @@ void writeAsyncMetrics(DB::WriteBuffer & wb, const DB::AsynchronousMetricValues } -#if USE_NURAFT -namespace ProfileEvents -{ - extern const std::vector keeper_profile_events; -} - -namespace CurrentMetrics -{ - extern const std::vector keeper_metrics; -} -#endif - namespace DB { -PrometheusMetricsWriter::PrometheusMetricsWriter( - const Poco::Util::AbstractConfiguration & config, const std::string & config_name, - const AsynchronousMetrics & async_metrics_) - : async_metrics(async_metrics_) - , send_events(config.getBool(config_name + ".events", true)) - , send_metrics(config.getBool(config_name + ".metrics", true)) - , send_asynchronous_metrics(config.getBool(config_name + ".asynchronous_metrics", true)) - , send_errors(config.getBool(config_name + ".errors", true)) +void PrometheusMetricsWriter::writeEvents(WriteBuffer & wb) const { + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) + writeEvent(wb, i); } -void PrometheusMetricsWriter::write(WriteBuffer & wb) const +void PrometheusMetricsWriter::writeMetrics(WriteBuffer & wb) const { - if (send_events) + for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + writeMetric(wb, i); +} + +void PrometheusMetricsWriter::writeAsynchronousMetrics(WriteBuffer & wb, const AsynchronousMetrics & async_metrics) const +{ + writeAsyncMetrics(wb, async_metrics.getValues()); +} + +void PrometheusMetricsWriter::writeErrors(WriteBuffer & wb) const +{ + size_t total_count = 0; + + for (size_t i = 0, end = ErrorCodes::end(); i < end; ++i) { - for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) - writeEvent(wb, i); - } + const auto & error = ErrorCodes::values[i].get(); + std::string_view name = ErrorCodes::getName(static_cast(i)); - if (send_metrics) - { - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) - writeMetric(wb, i); - } + if (name.empty()) + continue; - if (send_asynchronous_metrics) - writeAsyncMetrics(wb, async_metrics.getValues()); + std::string key{error_metrics_prefix + toString(name)}; + std::string help = fmt::format("The number of {} errors since last server restart", name); - if (send_errors) - { - size_t total_count = 0; - - for (size_t i = 0, end = ErrorCodes::end(); i < end; ++i) - { - const auto & error = ErrorCodes::values[i].get(); - std::string_view name = ErrorCodes::getName(static_cast(i)); - - if (name.empty()) - continue; - - std::string key{error_metrics_prefix + toString(name)}; - std::string help = fmt::format("The number of {} errors since last server restart", name); - - writeOutLine(wb, "# HELP", key, help); - writeOutLine(wb, "# TYPE", key, "counter"); - /// We are interested in errors which are happened only on this server. - writeOutLine(wb, key, error.local.count); - - total_count += error.local.count; - } - - /// Write the total number of errors as a separate metric - std::string key{error_metrics_prefix + toString("ALL")}; - writeOutLine(wb, "# HELP", key, "The total number of errors since last server restart"); + writeOutLine(wb, "# HELP", key, help); writeOutLine(wb, "# TYPE", key, "counter"); - writeOutLine(wb, key, total_count); + /// We are interested in errors which are happened only on this server. + writeOutLine(wb, key, error.local.count); + + total_count += error.local.count; } + /// Write the total number of errors as a separate metric + std::string key{error_metrics_prefix + toString("ALL")}; + writeOutLine(wb, "# HELP", key, "The total number of errors since last server restart"); + writeOutLine(wb, "# TYPE", key, "counter"); + writeOutLine(wb, key, total_count); } -void KeeperPrometheusMetricsWriter::write([[maybe_unused]] WriteBuffer & wb) const + +void KeeperPrometheusMetricsWriter::writeEvents([[maybe_unused]] WriteBuffer & wb) const { #if USE_NURAFT - if (send_events) - { - for (auto event : ProfileEvents::keeper_profile_events) - writeEvent(wb, event); - } - - if (send_metrics) - { - for (auto metric : CurrentMetrics::keeper_metrics) - writeMetric(wb, metric); - } - - if (send_asynchronous_metrics) - writeAsyncMetrics(wb, async_metrics.getValues()); + for (auto event : ProfileEvents::keeper_profile_events) + writeEvent(wb, event); #endif } +void KeeperPrometheusMetricsWriter::writeMetrics([[maybe_unused]] WriteBuffer & wb) const +{ +#if USE_NURAFT + for (auto metric : CurrentMetrics::keeper_metrics) + writeMetric(wb, metric); +#endif +} + +void KeeperPrometheusMetricsWriter::writeAsynchronousMetrics([[maybe_unused]] WriteBuffer & wb, + [[maybe_unused]] const AsynchronousMetrics & async_metrics) const +{ +#if USE_NURAFT + writeAsyncMetrics(wb, async_metrics.getValues()); +#endif +} + +void KeeperPrometheusMetricsWriter::writeErrors(WriteBuffer &) const +{ +} + } diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h index 933ad909ee0..cf2587d80b8 100644 --- a/src/Server/PrometheusMetricsWriter.h +++ b/src/Server/PrometheusMetricsWriter.h @@ -1,44 +1,33 @@ #pragma once -#include - -#include -#include -#include - -#include +#include namespace DB { +class AsynchronousMetrics; +class WriteBuffer; /// Write metrics in Prometheus format class PrometheusMetricsWriter { public: - PrometheusMetricsWriter( - const Poco::Util::AbstractConfiguration & config, const std::string & config_name, - const AsynchronousMetrics & async_metrics_); - - virtual void write(WriteBuffer & wb) const; - virtual ~PrometheusMetricsWriter() = default; -protected: - const AsynchronousMetrics & async_metrics; - const bool send_events; - const bool send_metrics; - const bool send_asynchronous_metrics; - const bool send_errors; + virtual void writeMetrics(WriteBuffer & wb) const; + virtual void writeAsynchronousMetrics(WriteBuffer & wb, const AsynchronousMetrics & async_metrics) const; + virtual void writeEvents(WriteBuffer & wb) const; + virtual void writeErrors(WriteBuffer & wb) const; }; + class KeeperPrometheusMetricsWriter : public PrometheusMetricsWriter { - using PrometheusMetricsWriter::PrometheusMetricsWriter; - - void write(WriteBuffer & wb) const override; +public: + void writeMetrics(WriteBuffer & wb) const override; + void writeAsynchronousMetrics(WriteBuffer & wb, const AsynchronousMetrics & async_metrics) const override; + void writeEvents(WriteBuffer & wb) const override; + void writeErrors(WriteBuffer & wb) const override; }; -using PrometheusMetricsWriterPtr = std::shared_ptr; - } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 87c106c3fc0..ae1fb6d629e 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -1,74 +1,447 @@ #include +#include +#include #include #include -#include +#include #include -#include -#include -#include -#include "Server/PrometheusMetricsWriter.h" +#include +#include "config.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { -void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) + +namespace ErrorCodes { + extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; + extern const int LOGICAL_ERROR; +} + +/// Base implementation of a prometheus protocol. +class PrometheusRequestHandler::Impl +{ +public: + explicit Impl(PrometheusRequestHandler & parent) : parent_ref(parent) {} + virtual ~Impl() = default; + virtual void beforeHandlingRequest(HTTPServerRequest & /* request */) {} + virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) = 0; + virtual void onException() {} + +protected: + PrometheusRequestHandler & parent() { return parent_ref; } + IServer & server() { return parent().server; } + const PrometheusRequestHandlerConfig & config() { return parent().config; } + PrometheusMetricsWriter & metrics_writer() { return *parent().metrics_writer; } + LoggerPtr log() { return parent().log; } + WriteBuffer & getOutputStream(HTTPServerResponse & response) { return parent().getOutputStream(response); } + +private: + PrometheusRequestHandler & parent_ref; +}; + + +/// Implementation of the exposing metrics protocol. +class PrometheusRequestHandler::ExposeMetricsImpl : public Impl +{ +public: + explicit ExposeMetricsImpl(PrometheusRequestHandler & parent) : Impl(parent) {} + + void beforeHandlingRequest(HTTPServerRequest & request) override + { + LOG_INFO(log(), "Handling metrics request from {}", request.get("User-Agent")); + chassert(config().type == PrometheusRequestHandlerConfig::Type::ExposeMetrics); + } + + void handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response) override + { + response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); + auto & out = getOutputStream(response); + + if (config().expose_events) + metrics_writer().writeEvents(out); + + if (config().expose_metrics) + metrics_writer().writeMetrics(out); + + if (config().expose_asynchronous_metrics) + metrics_writer().writeAsynchronousMetrics(out, parent().async_metrics); + + if (config().expose_errors) + metrics_writer().writeErrors(out); + } +}; + + +/// Base implementation of a protocol with Context and authentication. +class PrometheusRequestHandler::ImplWithContext : public Impl +{ +public: + explicit ImplWithContext(PrometheusRequestHandler & parent) : Impl(parent), default_settings(server().context()->getSettingsRef()) { } + + virtual void handlingRequestWithContext(HTTPServerRequest & request, HTTPServerResponse & response) = 0; + +protected: + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override + { + SCOPE_EXIT({ + request_credentials.reset(); + context.reset(); + session.reset(); + params.reset(); + }); + + params = std::make_unique(default_settings, request); + parent().send_stacktrace = config().is_stacktrace_enabled && params->getParsed("stacktrace", false); + + if (!authenticateUserAndMakeContext(request, response)) + return; /// The user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header, + /// and `request_credentials` must be preserved until the next request or until any exception. + + /// Initialize query scope. + std::optional query_scope; + if (context) + query_scope.emplace(context); + + handlingRequestWithContext(request, response); + } + + bool authenticateUserAndMakeContext(HTTPServerRequest & request, HTTPServerResponse & response) + { + session = std::make_unique(server().context(), ClientInfo::Interface::PROMETHEUS, request.isSecure()); + + if (!authenticateUser(request, response)) + return false; + + makeContext(request); + return true; + } + + bool authenticateUser(HTTPServerRequest & request, HTTPServerResponse & response) + { + return authenticateUserByHTTP(request, *params, response, *session, request_credentials, server().context(), log()); + } + + void makeContext(HTTPServerRequest & request) + { + context = session->makeQueryContext(); + + /// Anything else beside HTTP POST should be readonly queries. + setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod()); + + auto roles = params->getAll("role"); + if (!roles.empty()) + context->setCurrentRoles(roles); + + /// Settings can be overridden in the URL query. + auto is_setting_like_parameter = [&] (const String & name) + { + /// Empty parameter appears when URL like ?&a=b or a=b&&c=d. Just skip them for user's convenience. + if (name.empty()) + return false; + + /// Some parameters (database, default_format, everything used in the code above) do not + /// belong to the Settings class. + static const NameSet reserved_param_names{"user", "password", "quota_key", "stacktrace", "role", "query_id"}; + return !reserved_param_names.contains(name); + }; + + SettingsChanges settings_changes; + for (const auto & [key, value] : *params) + { + if (is_setting_like_parameter(key)) + { + /// This query parameter should be considered as a ClickHouse setting. + settings_changes.push_back({key, value}); + } + } + + context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); + context->applySettingsChanges(settings_changes); + + /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. + context->setCurrentQueryId(params->get("query_id", request.get("X-ClickHouse-Query-Id", ""))); + } + + void onException() override + { + // So that the next requests on the connection have to always start afresh in case of exceptions. + request_credentials.reset(); + } + + const Settings & default_settings; + std::unique_ptr params; + std::unique_ptr session; + std::unique_ptr request_credentials; + ContextMutablePtr context; +}; + + +/// Implementation of the remote-write protocol. +class PrometheusRequestHandler::RemoteWriteImpl : public ImplWithContext +{ +public: + using ImplWithContext::ImplWithContext; + + void beforeHandlingRequest(HTTPServerRequest & request) override + { + LOG_INFO(log(), "Handling remote write request from {}", request.get("User-Agent", "")); + chassert(config().type == PrometheusRequestHandlerConfig::Type::RemoteWrite); + } + + void handlingRequestWithContext([[maybe_unused]] HTTPServerRequest & request, [[maybe_unused]] HTTPServerResponse & response) override + { +#if USE_PROMETHEUS_PROTOBUFS + checkHTTPHeader(request, "Content-Type", "application/x-protobuf"); + checkHTTPHeader(request, "Content-Encoding", "snappy"); + + ProtobufZeroCopyInputStreamFromReadBuffer zero_copy_input_stream{ + std::make_unique(wrapReadBufferReference(request.getStream()))}; + + prometheus::WriteRequest write_request; + if (!write_request.ParsePartialFromZeroCopyStream(&zero_copy_input_stream)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse WriteRequest"); + + auto table = DatabaseCatalog::instance().getTable(StorageID{config().time_series_table_name}, context); + PrometheusRemoteWriteProtocol protocol{table, context}; + + if (write_request.timeseries_size()) + protocol.writeTimeSeries(write_request.timeseries()); + + if (write_request.metadata_size()) + protocol.writeMetricsMetadata(write_request.metadata()); + + response.setContentType("text/plain; charset=UTF-8"); + response.send(); + +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Prometheus remote write protocol is disabled"); +#endif + } +}; + +/// Implementation of the remote-read protocol. +class PrometheusRequestHandler::RemoteReadImpl : public ImplWithContext +{ +public: + using ImplWithContext::ImplWithContext; + + void beforeHandlingRequest(HTTPServerRequest & request) override + { + LOG_INFO(log(), "Handling remote read request from {}", request.get("User-Agent", "")); + chassert(config().type == PrometheusRequestHandlerConfig::Type::RemoteRead); + } + + void handlingRequestWithContext([[maybe_unused]] HTTPServerRequest & request, [[maybe_unused]] HTTPServerResponse & response) override + { +#if USE_PROMETHEUS_PROTOBUFS + checkHTTPHeader(request, "Content-Type", "application/x-protobuf"); + checkHTTPHeader(request, "Content-Encoding", "snappy"); + + auto table = DatabaseCatalog::instance().getTable(StorageID{config().time_series_table_name}, context); + PrometheusRemoteReadProtocol protocol{table, context}; + + ProtobufZeroCopyInputStreamFromReadBuffer zero_copy_input_stream{ + std::make_unique(wrapReadBufferReference(request.getStream()))}; + + prometheus::ReadRequest read_request; + if (!read_request.ParseFromZeroCopyStream(&zero_copy_input_stream)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse ReadRequest"); + + prometheus::ReadResponse read_response; + + size_t num_queries = read_request.queries_size(); + for (size_t i = 0; i != num_queries; ++i) + { + const auto & query = read_request.queries(static_cast(i)); + auto & new_query_result = *read_response.add_results(); + protocol.readTimeSeries( + *new_query_result.mutable_timeseries(), + query.start_timestamp_ms(), + query.end_timestamp_ms(), + query.matchers(), + query.hints()); + } + +# if 0 + LOG_DEBUG(log, "ReadResponse = {}", read_response.DebugString()); +# endif + + response.setContentType("application/x-protobuf"); + response.set("Content-Encoding", "snappy"); + + ProtobufZeroCopyOutputStreamFromWriteBuffer zero_copy_output_stream{std::make_unique(getOutputStream(response))}; + read_response.SerializeToZeroCopyStream(&zero_copy_output_stream); + zero_copy_output_stream.finalize(); + +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Prometheus remote read protocol is disabled"); +#endif + } +}; + + +PrometheusRequestHandler::PrometheusRequestHandler( + IServer & server_, + const PrometheusRequestHandlerConfig & config_, + const AsynchronousMetrics & async_metrics_, + std::shared_ptr metrics_writer_) + : server(server_) + , config(config_) + , async_metrics(async_metrics_) + , metrics_writer(metrics_writer_) + , log(getLogger("PrometheusRequestHandler")) +{ + createImpl(); +} + +PrometheusRequestHandler::~PrometheusRequestHandler() = default; + +void PrometheusRequestHandler::createImpl() +{ + switch (config.type) + { + case PrometheusRequestHandlerConfig::Type::ExposeMetrics: + { + impl = std::make_unique(*this); + return; + } + case PrometheusRequestHandlerConfig::Type::RemoteWrite: + { + impl = std::make_unique(*this); + return; + } + case PrometheusRequestHandlerConfig::Type::RemoteRead: + { + impl = std::make_unique(*this); + return; + } + } + UNREACHABLE(); +} + +void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event_) +{ + setThreadName("PrometheusHndlr"); + try { - const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + response_finalized = false; + write_event = write_event_; + http_method = request.getMethod(); + chassert(!write_buffer_from_response); /// Nothing is written to the response yet. - /// In order to make keep-alive works. + /// Make keep-alive works. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); - setResponseDefaultHeaders(response, keep_alive_timeout); + setResponseDefaultHeaders(response); - response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); + impl->beforeHandlingRequest(request); + impl->handleRequest(request, response); - WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); - metrics_writer->write(wb); - wb.finalize(); + finalizeResponse(response); } catch (...) { - tryLogCurrentException("PrometheusRequestHandler"); + tryLogCurrentException(log); + + ExecutionStatus status = ExecutionStatus::fromCurrentException("", send_stacktrace); + trySendExceptionToClient(status.message, status.code, request, response); + tryFinalizeResponse(response); + + tryCallOnException(); } } -HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( - IServer & server, - const Poco::Util::AbstractConfiguration & config, - AsynchronousMetrics & async_metrics, - const std::string & config_prefix) +WriteBufferFromHTTPServerResponse & PrometheusRequestHandler::getOutputStream(HTTPServerResponse & response) { - auto writer = std::make_shared(config, config_prefix + ".handler", async_metrics); - auto creator = [&server, writer]() -> std::unique_ptr - { - return std::make_unique(server, writer); - }; - - auto factory = std::make_shared>(std::move(creator)); - factory->addFiltersFromConfig(config, config_prefix); - return factory; + if (response_finalized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "PrometheusRequestHandler: Response already sent"); + if (write_buffer_from_response) + return *write_buffer_from_response; + write_buffer_from_response = std::make_unique( + response, http_method == HTTPRequest::HTTP_HEAD, write_event); + return *write_buffer_from_response; } -HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory( - IServer & server, const Poco::Util::AbstractConfiguration & config, PrometheusMetricsWriterPtr metrics_writer, const std::string & name) +void PrometheusRequestHandler::finalizeResponse(HTTPServerResponse & response) { - auto factory = std::make_shared(name); - auto creator = [&server, metrics_writer] + if (response_finalized) { - return std::make_unique(server, metrics_writer); - }; + /// Response is already finalized or at least tried to. We don't need the write buffer anymore in either case. + write_buffer_from_response = nullptr; + } + else + { + /// We set `response_finalized = true` before actually calling `write_buffer_from_response->finalize()` + /// because we shouldn't call finalize() again even if finalize() throws an exception. + response_finalized = true; - auto handler = std::make_shared>(std::move(creator)); - handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics")); - handler->allowGetAndHeadRequest(); - factory->addHandler(handler); - return factory; + if (write_buffer_from_response) + std::exchange(write_buffer_from_response, {})->finalize(); + else + WriteBufferFromHTTPServerResponse{response, http_method == HTTPRequest::HTTP_HEAD, write_event}.finalize(); + } + chassert(response_finalized && !write_buffer_from_response); } + +void PrometheusRequestHandler::trySendExceptionToClient(const String & exception_message, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response) +{ + if (response_finalized) + return; /// Response is already finalized (or tried to). We can't write the error message to the response in either case. + + try + { + sendExceptionToHTTPClient(exception_message, exception_code, request, response, &getOutputStream(response), log); + } + catch (...) + { + tryLogCurrentException(log, "Couldn't send exception to client"); + } +} + +void PrometheusRequestHandler::tryFinalizeResponse(HTTPServerResponse & response) +{ + try + { + finalizeResponse(response); + } + catch (...) + { + tryLogCurrentException(log, "Cannot flush data to client (after sending exception)"); + } +} + +void PrometheusRequestHandler::tryCallOnException() +{ + try + { + if (impl) + impl->onException(); + } + catch (...) + { + tryLogCurrentException(log, "onException"); + } +} + } diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h index d120752c8c5..281ecf5260e 100644 --- a/src/Server/PrometheusRequestHandler.h +++ b/src/Server/PrometheusRequestHandler.h @@ -1,28 +1,64 @@ #pragma once #include +#include -#include "PrometheusMetricsWriter.h" namespace DB { - +class AsynchronousMetrics; class IServer; +class PrometheusMetricsWriter; +class WriteBufferFromHTTPServerResponse; +/// Handles requests for prometheus protocols (expose_metrics, remote_write, remote-read). class PrometheusRequestHandler : public HTTPRequestHandler { -private: - IServer & server; - PrometheusMetricsWriterPtr metrics_writer; - public: - PrometheusRequestHandler(IServer & server_, PrometheusMetricsWriterPtr metrics_writer_) - : server(server_) - , metrics_writer(std::move(metrics_writer_)) - { - } + PrometheusRequestHandler( + IServer & server_, + const PrometheusRequestHandlerConfig & config_, + const AsynchronousMetrics & async_metrics_, + std::shared_ptr metrics_writer_); + ~PrometheusRequestHandler() override; - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event_) override; + +private: + /// Creates an internal implementation based on which PrometheusRequestHandlerConfig::Type is used. + void createImpl(); + + /// Returns the write buffer used for the current HTTP response. + WriteBufferFromHTTPServerResponse & getOutputStream(HTTPServerResponse & response); + + /// Finalizes the output stream and sends the response to the client. + void finalizeResponse(HTTPServerResponse & response); + void tryFinalizeResponse(HTTPServerResponse & response); + + /// Writes the current exception to the response. + void trySendExceptionToClient(const String & exception_message, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response); + + /// Calls onException() in a try-catch block. + void tryCallOnException(); + + IServer & server; + const PrometheusRequestHandlerConfig config; + const AsynchronousMetrics & async_metrics; + const std::shared_ptr metrics_writer; + const LoggerPtr log; + + class Impl; + class ImplWithContext; + class ExposeMetricsImpl; + class RemoteWriteImpl; + class RemoteReadImpl; + std::unique_ptr impl; + + String http_method; + bool send_stacktrace = false; + std::unique_ptr write_buffer_from_response; + bool response_finalized = false; + ProfileEvents::Event write_event; }; } diff --git a/src/Server/PrometheusRequestHandlerConfig.h b/src/Server/PrometheusRequestHandlerConfig.h new file mode 100644 index 00000000000..d01d28f702c --- /dev/null +++ b/src/Server/PrometheusRequestHandlerConfig.h @@ -0,0 +1,39 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Configuration of a Prometheus protocol handler after it's parsed from a configuration file. +struct PrometheusRequestHandlerConfig +{ + enum class Type + { + /// Exposes ClickHouse metrics for scraping by Prometheus. + ExposeMetrics, + + /// Handles Prometheus remote-write protocol. + RemoteWrite, + + /// Handles Prometheus remote-read protocol. + RemoteRead, + }; + + Type type = Type::ExposeMetrics; + + /// Settings for type ExposeMetrics: + bool expose_metrics = false; + bool expose_asynchronous_metrics = false; + bool expose_events = false; + bool expose_errors = false; + + /// Settings for types RemoteWrite, RemoteRead: + QualifiedTableName time_series_table_name; + + size_t keep_alive_timeout = 0; + bool is_stacktrace_enabled = true; +}; + +} diff --git a/src/Server/PrometheusRequestHandlerFactory.cpp b/src/Server/PrometheusRequestHandlerFactory.cpp new file mode 100644 index 00000000000..52f1d3b64c1 --- /dev/null +++ b/src/Server/PrometheusRequestHandlerFactory.cpp @@ -0,0 +1,242 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; +} + +namespace +{ + /// Parses common configuration which is attached to any other configuration. The common configuration looks like this: + /// + /// true + /// + /// 30 + void parseCommonConfig(const Poco::Util::AbstractConfiguration & config, PrometheusRequestHandlerConfig & res) + { + res.is_stacktrace_enabled = config.getBool("prometheus.enable_stacktrace", true); + res.keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + } + + /// Parses a configuration like this: + /// + /// true + /// true + /// true + /// true + PrometheusRequestHandlerConfig parseExposeMetricsConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + PrometheusRequestHandlerConfig res; + res.type = PrometheusRequestHandlerConfig::Type::ExposeMetrics; + res.expose_metrics = config.getBool(config_prefix + ".metrics", true); + res.expose_asynchronous_metrics = config.getBool(config_prefix + ".asynchronous_metrics", true); + res.expose_events = config.getBool(config_prefix + ".events", true); + res.expose_errors = config.getBool(config_prefix + ".errors", true); + parseCommonConfig(config, res); + return res; + } + + /// Extracts a qualified table name from the config. It can be set either as + /// mydb.prometheus
+ /// or + /// mydb + /// prometheus
+ QualifiedTableName parseTableNameFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + QualifiedTableName res; + res.table = config.getString(config_prefix + ".table", "prometheus"); + res.database = config.getString(config_prefix + ".database", ""); + if (res.database.empty()) + res = QualifiedTableName::parseFromString(res.table); + if (res.database.empty()) + res.database = "default"; + return res; + } + + /// Parses a configuration like this: + /// + /// db.time_series_table_name
+ PrometheusRequestHandlerConfig parseRemoteWriteConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + PrometheusRequestHandlerConfig res; + res.type = PrometheusRequestHandlerConfig::Type::RemoteWrite; + res.time_series_table_name = parseTableNameFromConfig(config, config_prefix); + parseCommonConfig(config, res); + return res; + } + + /// Parses a configuration like this: + /// + /// db.time_series_table_name
+ PrometheusRequestHandlerConfig parseRemoteReadConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + PrometheusRequestHandlerConfig res; + res.type = PrometheusRequestHandlerConfig::Type::RemoteRead; + res.time_series_table_name = parseTableNameFromConfig(config, config_prefix); + parseCommonConfig(config, res); + return res; + } + + /// Parses a configuration like this: + /// expose_metrics + /// true + /// true + /// true + /// true + /// -OR- + /// remote_write + /// db.time_series_table_name
+ PrometheusRequestHandlerConfig parseHandlerConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + String type = config.getString(config_prefix + ".type"); + + if (type == "expose_metrics") + return parseExposeMetricsConfig(config, config_prefix); + else if (type == "remote_write") + return parseRemoteWriteConfig(config, config_prefix); + else if (type == "remote_read") + return parseRemoteReadConfig(config, config_prefix); + else + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown type {} is specified in the configuration for a prometheus protocol", type); + } + + /// Returns true if the protocol represented by a passed config can be handled. + bool canBeHandled(const PrometheusRequestHandlerConfig & config, bool for_keeper) + { + /// The standalone ClickHouse Keeper can only expose its metrics. + /// It can't handle other Prometheus protocols. + return !for_keeper || (config.type == PrometheusRequestHandlerConfig::Type::ExposeMetrics); + } + + /// Creates a writer which serializes exposing metrics. + std::shared_ptr createPrometheusMetricWriter(bool for_keeper) + { + if (for_keeper) + return std::make_unique(); + else + return std::make_unique(); + } + + /// Base function for making a factory for PrometheusRequestHandler. This function can return nullptr. + std::shared_ptr> createPrometheusHandlerFactoryFromConfig( + IServer & server, + const AsynchronousMetrics & async_metrics, + const PrometheusRequestHandlerConfig & config, + bool for_keeper) + { + if (!canBeHandled(config, for_keeper)) + return nullptr; + auto metric_writer = createPrometheusMetricWriter(for_keeper); + auto creator = [&server, &async_metrics, config, metric_writer]() -> std::unique_ptr + { + return std::make_unique(server, config, async_metrics, metric_writer); + }; + return std::make_shared>(std::move(creator)); + } + + /// Generic function for createPrometheusHandlerFactory() and createKeeperPrometheusHandlerFactory(). + HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryImpl( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics, + const String & name, + bool for_keeper) + { + auto factory = std::make_shared(name); + + if (config.has("prometheus.handlers")) + { + Strings keys; + config.keys("prometheus.handlers", keys); + for (const String & key : keys) + { + String prefix = "prometheus.handlers." + key; + auto parsed_config = parseHandlerConfig(config, prefix + ".handler"); + if (auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, for_keeper)) + { + handler->addFiltersFromConfig(config, prefix); + factory->addHandler(handler); + } + } + } + else + { + auto parsed_config = parseExposeMetricsConfig(config, "prometheus"); + if (auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, for_keeper)) + { + String endpoint = config.getString("prometheus.endpoint", "/metrics"); + handler->attachStrictPath(endpoint); + handler->allowGetAndHeadRequest(); + factory->addHandler(handler); + } + } + + return factory; + } + +} + + +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics, + const String & name) +{ + return createPrometheusHandlerFactoryImpl(server, config, asynchronous_metrics, name, /* for_keeper= */ false); +} + + +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRule( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const AsynchronousMetrics & asynchronous_metrics) +{ + auto parsed_config = parseExposeMetricsConfig(config, config_prefix + ".handler"); + auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, /* for_keeper= */ false); + chassert(handler); /// `handler` can't be nullptr here because `for_keeper` is false. + handler->addFiltersFromConfig(config, config_prefix); + return handler; +} + + +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRuleDefaults( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics) +{ + /// The "defaults" HTTP handler should serve the prometheus exposing metrics protocol on the http port + /// only if it isn't already served on its own port and if there is no section. + if (!config.has("prometheus") || config.getInt("prometheus.port", 0) || config.has("prometheus.handlers")) + return nullptr; + + auto parsed_config = parseExposeMetricsConfig(config, "prometheus"); + String endpoint = config.getString("prometheus.endpoint", "/metrics"); + auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, /* for_keeper= */ false); + chassert(handler); /// `handler` can't be nullptr here because `for_keeper` is false. + handler->attachStrictPath(endpoint); + handler->allowGetAndHeadRequest(); + return handler; +} + + +HTTPRequestHandlerFactoryPtr createKeeperPrometheusHandlerFactory( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics, + const String & name) +{ + return createPrometheusHandlerFactoryImpl(server, config, asynchronous_metrics, name, /* for_keeper= */ true); +} + +} diff --git a/src/Server/PrometheusRequestHandlerFactory.h b/src/Server/PrometheusRequestHandlerFactory.h new file mode 100644 index 00000000000..c52395ca93f --- /dev/null +++ b/src/Server/PrometheusRequestHandlerFactory.h @@ -0,0 +1,130 @@ +#pragma once + +#include +#include + + +namespace Poco::Util { class AbstractConfiguration; } + +namespace DB +{ + +class IServer; +class HTTPRequestHandlerFactory; +using HTTPRequestHandlerFactoryPtr = std::shared_ptr; +class AsynchronousMetrics; + +/// Makes a handler factory to handle prometheus protocols. +/// Expects a configuration like this: +/// +/// +/// 1234 +/// /metric +/// true +/// true +/// true +/// true +/// +/// +/// More prometheus protocols can be supported with using a different configuration +/// (which is similar to the section): +/// +/// +/// 1234 +/// +/// +/// /metrics +/// +/// expose_metrics +/// true +/// true +/// true +/// true +/// +/// +/// +/// +/// +/// An alternative port to serve prometheus protocols can be specified in the section: +/// +/// +/// +/// 4321 +/// prometheus +/// +/// +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics, + const String & name); + +/// Makes a HTTP handler factory to handle requests for prometheus metrics for a HTTP rule in the section. +/// Expects a configuration like this: +/// +/// 8123 +/// +/// +/// /metrics +/// +/// prometheus +/// true +/// true +/// true +/// true +/// +/// +/// +/// /write +/// +/// remote_write +/// db.time_series_table_name
+///
+///
+/// +/// /read +/// +/// remote_read +/// db.time_series_table_name
+///
+///
+///
+HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRule( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, /// path to "http_handlers.my_handler_1" + const AsynchronousMetrics & asynchronous_metrics); + +/// Makes a HTTP Handler factory to handle requests for prometheus metrics as a part of the default HTTP rule in the section. +/// Expects a configuration like this: +/// +/// 8123 +/// +/// +/// +/// +/// /metric +/// true +/// true +/// true +/// true +/// +/// +/// The "defaults" HTTP handler should serve the prometheus exposing metrics protocol on the http port +/// only if it isn't already served on its own port , +/// and also if there is no section in the configuration +/// (because if that section exists then it must be in charge of how prometheus protocols are handled). +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRuleDefaults( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics); + +/// Makes a handler factory to handle prometheus protocols. +/// Supports the "expose_metrics" protocol only. +HTTPRequestHandlerFactoryPtr createKeeperPrometheusHandlerFactory( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + const AsynchronousMetrics & asynchronous_metrics, + const String & name); + +} diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index f43357db0a8..419ad635d0d 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -89,8 +89,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } } - const auto & server_settings = getContext()->getServerSettings(); - setResponseDefaultHeaders(response, server_settings.keep_alive_timeout.totalSeconds()); + setResponseDefaultHeaders(response); if (!ok) { diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index f3981dea9fb..d8c0765bca4 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -35,10 +35,9 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } -static inline std::unique_ptr -responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout) +static inline std::unique_ptr responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response) { - auto buf = std::unique_ptr(new WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout)); + auto buf = std::unique_ptr(new WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD)); /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); @@ -91,8 +90,7 @@ static inline void trySendExceptionToClient( void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { - auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); - auto out = responseWriteBuffer(request, response, keep_alive_timeout); + auto out = responseWriteBuffer(request, response); try { @@ -107,7 +105,7 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer "The Transfer-Encoding is not chunked and there " "is no Content-Length header for POST request"); - setResponseDefaultHeaders(response, keep_alive_timeout); + setResponseDefaultHeaders(response); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTPStatus(status)); writeResponse(*out); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 448dfafbd9d..2b9a7295198 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -103,6 +103,7 @@ namespace DB::ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; extern const int USER_EXPIRED; + extern const int NETWORK_ERROR; } namespace @@ -254,8 +255,8 @@ void TCPHandler::runImpl() socket().setSendTimeout(send_timeout); socket().setNoDelay(true); - in = std::make_shared(socket(), read_event); - out = std::make_shared(socket(), write_event); + in = std::make_shared(socket(), read_event); + out = std::make_shared(socket(), write_event); /// Support for PROXY protocol if (parse_proxy_protocol && !receiveProxyHeader()) @@ -280,6 +281,48 @@ void TCPHandler::runImpl() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); + { + /// Server side of chunked protocol negotiation. + /// Server advertises its protocol capabilities (separate for send and receive channels) by sending + /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. + /// Not optional types are strict meaning that server only supports this type, optional means that + /// server prefer this type but capable to work in opposite. + /// Client selects which type it is going to communicate based on the settings from config or arguments, + /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. + /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example + /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case + /// client should interrupt this connection. However if client continues with incompatible protocol type request, server + /// will send appropriate exception and disconnect client. + + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) + { + bool chunked_srv = chunked_srv_str.starts_with("chunked"); + bool optional_srv = chunked_srv_str.ends_with("_optional"); + bool chunked_cl = chunked_cl_str.starts_with("chunked"); + + if (optional_srv) + return chunked_cl; + + if (chunked_cl != chunked_srv) + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: {} is {}, client requested {}", + direction, + chunked_srv ? "chunked" : "notchunked", + chunked_cl ? "chunked" : "notchunked"); + + return chunked_srv; + }; + + bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked"), proto_recv_chunked_cl, "send"); + bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked"), proto_send_chunked_cl, "recv"); + + if (out_chunked) + out->enableChunked(); + if (in_chunked) + in->enableChunked(); + } + if (!is_interserver_mode) { /// If session created, then settings in session context has been updated. @@ -321,7 +364,7 @@ void TCPHandler::runImpl() { Stopwatch idle_time; UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000; - while (tcp_server.isOpen() && !server.isCancelled() && !static_cast(*in).poll(timeout_ms)) + while (tcp_server.isOpen() && !server.isCancelled() && !in->poll(timeout_ms)) { if (idle_time.elapsedSeconds() > idle_connection_timeout) { @@ -796,7 +839,7 @@ bool TCPHandler::readDataNext() /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down. while (true) { - if (static_cast(*in).poll(timeout_us)) + if (in->poll(timeout_us)) { /// If client disconnected. if (in->eof()) @@ -1186,6 +1229,8 @@ void TCPHandler::processTablesStatusRequest() } response.write(*out, client_tcp_protocol_version); + + out->finishChunk(); } void TCPHandler::receiveUnexpectedTablesStatusRequest() @@ -1206,6 +1251,8 @@ void TCPHandler::sendPartUUIDs() writeVarUInt(Protocol::Server::PartUUIDs, *out); writeVectorBinary(uuids, *out); + + out->finishChunk(); out->next(); } } @@ -1214,6 +1261,8 @@ void TCPHandler::sendPartUUIDs() void TCPHandler::sendReadTaskRequestAssumeLocked() { writeVarUInt(Protocol::Server::ReadTaskRequest, *out); + + out->finishChunk(); out->next(); } @@ -1222,6 +1271,8 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges { writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); announcement.serialize(*out); + + out->finishChunk(); out->next(); } @@ -1230,6 +1281,8 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re { writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); request.serialize(*out); + + out->finishChunk(); out->next(); } @@ -1237,7 +1290,9 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); - info.write(*out); + info.write(*out, client_tcp_protocol_version); + + out->finishChunk(); out->next(); } @@ -1253,6 +1308,8 @@ void TCPHandler::sendTotals(const Block & totals) state.block_out->write(totals); state.maybe_compressed_out->next(); + + out->finishChunk(); out->next(); } } @@ -1269,6 +1326,8 @@ void TCPHandler::sendExtremes(const Block & extremes) state.block_out->write(extremes); state.maybe_compressed_out->next(); + + out->finishChunk(); out->next(); } } @@ -1286,6 +1345,8 @@ void TCPHandler::sendProfileEvents() writeStringBinary("", *out); state.profile_events_block_out->write(block); + + out->finishChunk(); out->next(); auto elapsed_milliseconds = stopwatch.elapsedMilliseconds(); @@ -1323,6 +1384,8 @@ void TCPHandler::sendTimezone() LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); writeVarUInt(Protocol::Server::TimezoneUpdate, *out); writeStringBinary(tz, *out); + + out->finishChunk(); out->next(); } @@ -1583,6 +1646,12 @@ void TCPHandler::receiveAddendum() if (!is_interserver_mode) session->setQuotaClientKey(quota_key); + + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + readStringBinary(proto_send_chunked_cl, *in); + readStringBinary(proto_recv_chunked_cl, *in); + } } @@ -1616,6 +1685,11 @@ void TCPHandler::sendHello() writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) writeVarUInt(VERSION_PATCH, *out); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + writeStringBinary(server.config().getString("proto_caps.send", "notchunked"), *out); + writeStringBinary(server.config().getString("proto_caps.recv", "notchunked"), *out); + } if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); @@ -1668,6 +1742,7 @@ bool TCPHandler::receivePacket() case Protocol::Client::Ping: writeVarUInt(Protocol::Server::Pong, *out); + out->finishChunk(); out->next(); return false; @@ -2197,7 +2272,7 @@ QueryState::CancellationStatus TCPHandler::getQueryCancellationStatus() after_check_cancelled.restart(); /// During request execution the only packet that can come from the client is stopping the query. - if (static_cast(*in).poll(0)) + if (in->poll(0)) { if (in->eof()) { @@ -2248,19 +2323,33 @@ void TCPHandler::sendData(const Block & block) } writeVarUInt(Protocol::Server::Data, *out); - /// Send external table name (empty name is the main table) - writeStringBinary("", *out); /// For testing hedged requests if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds()) { + /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to + /// hang on receiving of at least packet type - chunk will not be processed unless either chunk footer + /// or chunk continuation header is received - first 'next' is sending starting chunk containing packet type + /// and second 'next' is sending chunk continuation header. + out->next(); + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); out->next(); std::chrono::milliseconds ms(query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds()); std::this_thread::sleep_for(ms); } + else + { + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); + } state.block_out->write(block); - state.maybe_compressed_out->next(); + + if (state.maybe_compressed_out != out) + state.maybe_compressed_out->next(); + + out->finishChunk(); out->next(); } catch (...) @@ -2296,6 +2385,8 @@ void TCPHandler::sendLogData(const Block & block) writeStringBinary("", *out); state.logs_block_out->write(block); + + out->finishChunk(); out->next(); } @@ -2307,6 +2398,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns) writeStringBinary("", *out); writeStringBinary(columns.toString(), *out); + out->finishChunk(); out->next(); } @@ -2316,6 +2408,8 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace) writeVarUInt(Protocol::Server::Exception, *out); writeException(e, *out, with_stack_trace); + + out->finishChunk(); out->next(); } @@ -2326,6 +2420,8 @@ void TCPHandler::sendEndOfStream() state.io.setAllDataSent(); writeVarUInt(Protocol::Server::EndOfStream, *out); + + out->finishChunk(); out->next(); } @@ -2344,6 +2440,8 @@ void TCPHandler::sendProgress() increment.elapsed_ns = current_elapsed_ns - state.prev_elapsed_ns; state.prev_elapsed_ns = current_elapsed_ns; increment.write(*out, client_tcp_protocol_version); + + out->finishChunk(); out->next(); } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 74afb5a14a5..dca40e98920 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "Core/Types.h" #include "IServer.h" @@ -186,6 +188,8 @@ private: UInt64 client_version_minor = 0; UInt64 client_version_patch = 0; UInt32 client_tcp_protocol_version = 0; + String proto_send_chunked_cl = "notchunked"; + String proto_recv_chunked_cl = "notchunked"; String quota_key; /// Connection settings, which are extracted from a context. @@ -204,8 +208,8 @@ private: ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::NO_QUERY; /// Streams for reading/writing from/to client connection socket. - std::shared_ptr in; - std::shared_ptr out; + std::shared_ptr in; + std::shared_ptr out; ProfileEvents::Event read_event; ProfileEvents::Event write_event; diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index a3d098014e7..c04d7a3f2a0 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -30,23 +30,20 @@ DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) : BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {} JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {} -static void handle(const IServer & server, HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html) +static void handle(HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html) { - auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); - response.setContentType("text/html; charset=UTF-8"); if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); - setResponseDefaultHeaders(response, keep_alive_timeout); + setResponseDefaultHeaders(response); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html.data(), html.size()); - + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD).write(html.data(), html.size()); } void PlayWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) { - handle(server, request, response, {reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize}); + handle(request, response, {reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize}); } void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) @@ -64,23 +61,23 @@ void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HT static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)"; RE2::Replace(&html, lz_string_url, "/js/lz-string.js"); - handle(server, request, response, html); + handle(request, response, html); } void BinaryWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) { - handle(server, request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); + handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); } void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) { if (request.getURI() == "/js/uplot.js") { - handle(server, request, response, {reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize}); + handle(request, response, {reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize}); } else if (request.getURI() == "/js/lz-string.js") { - handle(server, request, response, {reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize}); + handle(request, response, {reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize}); } else { @@ -88,7 +85,7 @@ void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, H *response.send() << "Not found.\n"; } - handle(server, request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); + handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); } } diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index dc17570f833..7836e88f2af 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -180,6 +180,8 @@ message Stats { uint64 allocated_bytes = 3; bool applied_limit = 4; uint64 rows_before_limit = 5; + bool applied_aggregation = 6; + uint64 rows_before_aggregation = 7; } message Exception { diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index dfb388ffdb2..d92d8b59f6e 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -110,7 +111,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = typeid_cast(*ast_col_decl.comment); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); } if (ast_col_decl.codec) @@ -168,7 +169,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = ast_col_decl.comment->as(); - command.comment.emplace(ast_comment.value.get()); + command.comment.emplace(ast_comment.value.safeGet()); } if (ast_col_decl.ttl) @@ -211,7 +212,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.type = COMMENT_COLUMN; command.column_name = getIdentifierName(command_ast->column); const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); command.if_exists = command_ast->if_exists; return command; } @@ -221,7 +222,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ast = command_ast->clone(); command.type = COMMENT_TABLE; const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) @@ -705,9 +706,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) } auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); - for (const auto & stats : stats_vec) + for (const auto & [stats_column_name, stats] : stats_vec) { - metadata.columns.modify(stats.column_name, + metadata.columns.modify(stats_column_name, [&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); }); } } @@ -734,14 +735,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (!metadata.columns.has(statistics_column_name)) { - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name); } } auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); - for (const auto & stats : stats_vec) + for (const auto & [stats_column_name, stats] : stats_vec) { - metadata.columns.modify(stats.column_name, + metadata.columns.modify(stats_column_name, [&](ColumnDescription & column) { column.statistics.assign(stats); }); } } @@ -866,8 +867,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) rename_visitor.visit(column_to_modify.default_desc.expression); if (column_to_modify.ttl) rename_visitor.visit(column_to_modify.ttl); - if (column_to_modify.name == column_name && !column_to_modify.statistics.empty()) - column_to_modify.statistics.column_name = rename_to; }); } if (metadata.table_ttl.definition_ast) @@ -1403,14 +1402,22 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const GetColumnsOptions options(GetColumnsOptions::All); const auto old_data_type = all_columns.getColumn(options, column_name).type; - bool new_type_has_object = command.data_type->hasDynamicSubcolumnsDeprecated(); - bool old_type_has_object = old_data_type->hasDynamicSubcolumnsDeprecated(); + bool new_type_has_deprecated_object = command.data_type->hasDynamicSubcolumnsDeprecated(); + bool old_type_has_deprecated_object = old_data_type->hasDynamicSubcolumnsDeprecated(); - if (new_type_has_object || old_type_has_object) + if (new_type_has_deprecated_object || old_type_has_deprecated_object) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The change of data type {} of column {} to {} is not allowed. It has known bugs", old_data_type->getName(), backQuote(column_name), command.data_type->getName()); + + bool has_object_type = isObject(command.data_type); + command.data_type->forEachChild([&](const IDataType & type){ has_object_type |= isObject(type); }); + if (has_object_type) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The change of data type {} of column {} to {} is not supported.", + old_data_type->getName(), backQuote(column_name), command.data_type->getName()); } if (command.isRemovingProperty()) diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index cffb1dc9ca3..8c778fd511a 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -57,8 +57,15 @@ LocalFileHolder::~LocalFileHolder() { if (original_readbuffer) { - assert_cast(original_readbuffer.get())->seek(0, SEEK_SET); - file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool); + try + { + assert_cast(original_readbuffer.get())->seek(0, SEEK_SET); + file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool); + } + catch (...) + { + tryLogCurrentException(getLogger("LocalFileHolder"), "Exception during destructor of LocalFileHolder."); + } } } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index da749812167..0de9fec3bb2 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const && ast_to_str(ttl) == ast_to_str(other.ttl); } -void ColumnDescription::writeText(WriteBuffer & buf) const +String formatASTStateAware(IAST & ast, IAST::FormatState & state) +{ + WriteBufferFromOwnString buf; + IAST::FormatSettings settings(buf, true, false); + ast.formatImpl(settings, state, IAST::FormatStateStacked()); + return buf.str(); +} + +void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const { /// NOTE: Serialization format is insane. @@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const writeChar('\t', buf); DB::writeText(DB::toString(default_desc.kind), buf); writeChar('\t', buf); - writeEscapedString(queryToString(default_desc.expression), buf); + writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf); } - if (!comment.empty()) + if (!comment.empty() && include_comment) { writeChar('\t', buf); DB::writeText("COMMENT ", buf); - writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf); + auto ast = ASTLiteral(Field(comment)); + writeEscapedString(formatASTStateAware(ast, state), buf); } if (codec) { writeChar('\t', buf); - writeEscapedString(queryToString(codec), buf); + writeEscapedString(formatASTStateAware(*codec, state), buf); } if (!settings.empty()) @@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const ASTSetQuery ast; ast.is_standalone = false; ast.changes = settings; - writeEscapedString(queryToString(ast), buf); + writeEscapedString(formatASTStateAware(ast, state), buf); DB::writeText(")", buf); } if (!statistics.empty()) { writeChar('\t', buf); - writeEscapedString(queryToString(statistics.getAST()), buf); + writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf); } if (ttl) { writeChar('\t', buf); DB::writeText("TTL ", buf); - writeEscapedString(queryToString(ttl), buf); + writeEscapedString(formatASTStateAware(*ttl, state), buf); } writeChar('\n', buf); @@ -197,7 +206,7 @@ void ColumnDescription::readText(ReadBuffer & buf) } if (col_ast->comment) - comment = col_ast->comment->as().value.get(); + comment = col_ast->comment->as().value.safeGet(); if (col_ast->codec) codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); @@ -209,11 +218,7 @@ void ColumnDescription::readText(ReadBuffer & buf) settings = col_ast->settings->as().changes; if (col_ast->statistics_desc) - { statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type); - /// every column has name `x` here, so we have to set the name manually. - statistics.column_name = name; - } } else throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); @@ -895,16 +900,17 @@ void ColumnsDescription::resetColumnTTLs() } -String ColumnsDescription::toString() const +String ColumnsDescription::toString(bool include_comments) const { WriteBufferFromOwnString buf; + IAST::FormatState ast_format_state; writeCString("columns format version: 1\n", buf); DB::writeText(columns.size(), buf); writeCString(" columns:\n", buf); for (const ColumnDescription & column : columns) - column.writeText(buf); + column.writeText(buf, ast_format_state, include_comments); return buf.str(); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index f0760160f0a..c89c26501e8 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -104,7 +104,7 @@ struct ColumnDescription bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } - void writeText(WriteBuffer & buf) const; + void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const; void readText(ReadBuffer & buf); }; @@ -137,7 +137,7 @@ public: /// NOTE Must correspond with Nested::flatten function. void flattenNested(); /// TODO: remove, insert already flattened Nested columns. - bool operator==(const ColumnsDescription & other) const { return columns == other.columns; } + bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } auto begin() const { return columns.begin(); } @@ -221,7 +221,7 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - String toString() const; + String toString(bool include_comments = true) const; static ColumnsDescription parse(const String & str); size_t size() const diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index fdb4cfcb371..7616b384860 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -273,6 +273,8 @@ ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(c address.default_database, address.user, address.password, + address.proto_send_chunked, + address.proto_recv_chunked, address.quota_key, address.cluster, address.cluster_secret, diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp deleted file mode 100644 index 41979f8d91c..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ /dev/null @@ -1,288 +0,0 @@ -#include "ExternalDataSourceConfiguration.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "quota_key", "db", - "database", "table", "schema", "replica", - "update_field", "update_lag", "invalidate_query", "query", - "where", "name", "secure", "uri", "collection"}; - - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - SettingsChanges config_settings; - for (const auto & setting : settings.all()) - { - const auto & setting_name = setting.getName(); - auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); - if (!setting_value.empty()) - config_settings.emplace_back(setting_name, setting_value); - } - return config_settings; -} - - -String ExternalDataSourceConfiguration::toString() const -{ - WriteBufferFromOwnString configuration_info; - configuration_info << "username: " << username << "\t"; - if (addresses.empty()) - { - configuration_info << "host: " << host << "\t"; - configuration_info << "port: " << port << "\t"; - } - else - { - for (const auto & [replica_host, replica_port] : addresses) - { - configuration_info << "host: " << replica_host << "\t"; - configuration_info << "port: " << replica_port << "\t"; - } - } - return configuration_info.str(); -} - - -void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration & conf) -{ - host = conf.host; - port = conf.port; - username = conf.username; - password = conf.password; - quota_key = conf.quota_key; - database = conf.database; - table = conf.table; - schema = conf.schema; - addresses = conf.addresses; - addresses_expr = conf.addresses_expr; -} - - -static void validateConfigKeys( - const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix, HasConfigKeyFunc has_config_key_func) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(config_prefix, config_keys); - for (const auto & config_key : config_keys) - { - if (!has_config_key_func(config_key)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); - } -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration configuration; - - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - validateConfigKeys(dict_config, collection_prefix, has_config_key); - auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix); - auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix); - /// dictionary config settings override collection settings. - config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end()); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.host = dict_config.getString(dict_config_prefix + ".host", config.getString(collection_prefix + ".host", "")); - configuration.port = dict_config.getInt(dict_config_prefix + ".port", config.getUInt(collection_prefix + ".port", 0)); - configuration.username = dict_config.getString(dict_config_prefix + ".user", config.getString(collection_prefix + ".user", "")); - configuration.password = dict_config.getString(dict_config_prefix + ".password", config.getString(collection_prefix + ".password", "")); - configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", config.getString(collection_prefix + ".quota_key", "")); - configuration.database = dict_config.getString(dict_config_prefix + ".db", config.getString(dict_config_prefix + ".database", - config.getString(collection_prefix + ".db", config.getString(collection_prefix + ".database", "")))); - configuration.table = dict_config.getString(dict_config_prefix + ".table", config.getString(collection_prefix + ".table", "")); - configuration.schema = dict_config.getString(dict_config_prefix + ".schema", config.getString(collection_prefix + ".schema", "")); - - if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and dictionary parameters are not added"); - } - return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings}; - } - return std::nullopt; -} - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context) -{ - URLBasedDataSourceConfiguration configuration; - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.url = - dict_config.getString(dict_config_prefix + ".url", config.getString(collection_prefix + ".url", "")); - configuration.endpoint = - dict_config.getString(dict_config_prefix + ".endpoint", config.getString(collection_prefix + ".endpoint", "")); - configuration.format = - dict_config.getString(dict_config_prefix + ".format", config.getString(collection_prefix + ".format", "")); - configuration.compression_method = - dict_config.getString(dict_config_prefix + ".compression", config.getString(collection_prefix + ".compression_method", "")); - configuration.structure = - dict_config.getString(dict_config_prefix + ".structure", config.getString(collection_prefix + ".structure", "")); - configuration.user = - dict_config.getString(dict_config_prefix + ".credentials.user", config.getString(collection_prefix + ".credentials.user", "")); - configuration.password = - dict_config.getString(dict_config_prefix + ".credentials.password", config.getString(collection_prefix + ".credentials.password", "")); - - String headers_prefix; - const Poco::Util::AbstractConfiguration *headers_config = nullptr; - if (dict_config.has(dict_config_prefix + ".headers")) - { - headers_prefix = dict_config_prefix + ".headers"; - headers_config = &dict_config; - } - else - { - headers_prefix = collection_prefix + ".headers"; - headers_config = &config; - } - - if (headers_config) - { - Poco::Util::AbstractConfiguration::Keys header_keys; - headers_config->keys(headers_prefix, header_keys); - headers_prefix += "."; - for (const auto & header : header_keys) - { - const auto header_prefix = headers_prefix + header; - configuration.headers.emplace_back( - headers_config->getString(header_prefix + ".name"), - headers_config->getString(header_prefix + ".value")); - } - } - - return URLBasedDataSourceConfig{ .configuration = configuration }; - } - - return std::nullopt; -} - -ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration common_configuration; - - auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key); - if (named_collection) - { - common_configuration = named_collection->configuration; - } - else - { - common_configuration.host = dict_config.getString(dict_config_prefix + ".host", ""); - common_configuration.port = dict_config.getUInt(dict_config_prefix + ".port", 0); - common_configuration.username = dict_config.getString(dict_config_prefix + ".user", ""); - common_configuration.password = dict_config.getString(dict_config_prefix + ".password", ""); - common_configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", ""); - common_configuration.database = dict_config.getString(dict_config_prefix + ".db", dict_config.getString(dict_config_prefix + ".database", "")); - common_configuration.table = dict_config.getString(fmt::format("{}.table", dict_config_prefix), ""); - common_configuration.schema = dict_config.getString(fmt::format("{}.schema", dict_config_prefix), ""); - } - - ExternalDataSourcesByPriority configuration - { - .database = common_configuration.database, - .table = common_configuration.table, - .schema = common_configuration.schema, - .replicas_configurations = {} - }; - - if (dict_config.has(dict_config_prefix + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(dict_config_prefix, config_keys); - - for (const auto & config_key : config_keys) - { - if (config_key.starts_with("replica")) - { - ExternalDataSourceConfiguration replica_configuration(common_configuration); - String replica_name = dict_config_prefix + "." + config_key; - validateConfigKeys(dict_config, replica_name, has_config_key); - - size_t priority = dict_config.getInt(replica_name + ".priority", 0); - replica_configuration.host = dict_config.getString(replica_name + ".host", common_configuration.host); - replica_configuration.port = dict_config.getUInt(replica_name + ".port", common_configuration.port); - replica_configuration.username = dict_config.getString(replica_name + ".user", common_configuration.username); - replica_configuration.password = dict_config.getString(replica_name + ".password", common_configuration.password); - replica_configuration.quota_key = dict_config.getString(replica_name + ".quota_key", common_configuration.quota_key); - - if (replica_configuration.host.empty() || replica_configuration.port == 0 - || replica_configuration.username.empty() || replica_configuration.password.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and no other dictionary parameters are added"); - } - - configuration.replicas_configurations[priority].emplace_back(replica_configuration); - } - } - } - else - { - configuration.replicas_configurations[0].emplace_back(common_configuration); - } - - return configuration; -} - - -void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration & conf) -{ - url = conf.url; - format = conf.format; - compression_method = conf.compression_method; - structure = conf.structure; - http_method = conf.http_method; - headers = conf.headers; -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -} diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h deleted file mode 100644 index c703c9ce999..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -#define EMPTY_SETTINGS(M, ALIAS) -DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -struct EmptySettings : public BaseSettings {}; - -struct ExternalDataSourceConfiguration -{ - String host; - UInt16 port = 0; - String username = "default"; - String password; - String quota_key; - String database; - String table; - String schema; - - std::vector> addresses; /// Failover replicas. - String addresses_expr; - - String toString() const; - - void set(const ExternalDataSourceConfiguration & conf); -}; - - -using StorageSpecificArgs = std::vector>; - -struct ExternalDataSourceInfo -{ - ExternalDataSourceConfiguration configuration; - SettingsChanges settings_changes; -}; - -using HasConfigKeyFunc = std::function; - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings = {}); - - -/// Highest priority is 0, the bigger the number in map, the less the priority. -using ExternalDataSourcesConfigurationByPriority = std::map>; - -struct ExternalDataSourcesByPriority -{ - String database; - String table; - String schema; - ExternalDataSourcesConfigurationByPriority replicas_configurations; -}; - -ExternalDataSourcesByPriority -getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key); - -struct URLBasedDataSourceConfiguration -{ - String url; - String endpoint; - String format = "auto"; - String compression_method = "auto"; - String structure = "auto"; - - String user; - String password; - - HTTPHeaderEntries headers; - String http_method; - - void set(const URLBasedDataSourceConfiguration & conf); -}; - -struct URLBasedDataSourceConfig -{ - URLBasedDataSourceConfiguration configuration; -}; - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context); - -} diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 089fb5c585c..ea2e9e3eece 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -444,8 +444,8 @@ StorageHive::StorageHive( storage_metadata.setComment(comment_); storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext())); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHive::lazyInitialize() diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 0477a08b0d2..6de7e60285f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -103,7 +103,7 @@ public: IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; - /// The main name of the table type (for example, StorageMergeTree). + /// The main name of the table type (e.g. Memory, MergeTree, CollapsingMergeTree). virtual std::string getName() const = 0; /// The name of the table. diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index cef8fd85f97..753fbf1d635 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -130,10 +131,15 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast { for (size_t i = 0; i < index_type->arguments->children.size(); ++i) { - const auto * argument = index_type->arguments->children[i]->as(); - if (!argument) + const auto & child = index_type->arguments->children[i]; + if (const auto * ast_literal = child->as(); ast_literal != nullptr) + /// E.g. INDEX index_name column_name TYPE vector_similarity('hnsw', 'f32') + result.arguments.emplace_back(ast_literal->value); + else if (const auto * ast_identifier = child->as(); ast_identifier != nullptr) + /// E.g. INDEX index_name column_name TYPE vector_similarity(hnsw, f32) + result.arguments.emplace_back(ast_identifier->name()); + else throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments"); - result.arguments.emplace_back(argument->value); } } diff --git a/src/Storages/Kafka/KafkaConfigLoader.cpp b/src/Storages/Kafka/KafkaConfigLoader.cpp new file mode 100644 index 00000000000..df6ccec4b7f --- /dev/null +++ b/src/Storages/Kafka/KafkaConfigLoader.cpp @@ -0,0 +1,480 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace CurrentMetrics +{ +extern const Metric KafkaLibrdkafkaThreads; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +template +struct KafkaInterceptors +{ + static rd_kafka_resp_err_t rdKafkaOnThreadStart(rd_kafka_t *, rd_kafka_thread_type_t thread_type, const char *, void * ctx); + + static rd_kafka_resp_err_t rdKafkaOnThreadExit(rd_kafka_t *, rd_kafka_thread_type_t, const char *, void * ctx); + + static rd_kafka_resp_err_t + rdKafkaOnNew(rd_kafka_t * rk, const rd_kafka_conf_t *, void * ctx, char * /*errstr*/, size_t /*errstr_size*/); + + static rd_kafka_resp_err_t rdKafkaOnConfDup( + rd_kafka_conf_t * new_conf, const rd_kafka_conf_t * /*old_conf*/, size_t /*filter_cnt*/, const char ** /*filter*/, void * ctx); +}; + +template +rd_kafka_resp_err_t +KafkaInterceptors::rdKafkaOnThreadStart(rd_kafka_t *, rd_kafka_thread_type_t thread_type, const char *, void * ctx) +{ + TStorageKafka * self = reinterpret_cast(ctx); + CurrentMetrics::add(CurrentMetrics::KafkaLibrdkafkaThreads, 1); + + const auto & storage_id = self->getStorageID(); + const auto & table = storage_id.getTableName(); + + switch (thread_type) + { + case RD_KAFKA_THREAD_MAIN: + setThreadName(("rdk:m/" + table.substr(0, 9)).c_str()); + break; + case RD_KAFKA_THREAD_BACKGROUND: + setThreadName(("rdk:bg/" + table.substr(0, 8)).c_str()); + break; + case RD_KAFKA_THREAD_BROKER: + setThreadName(("rdk:b/" + table.substr(0, 9)).c_str()); + break; + } + + /// Create ThreadStatus to track memory allocations from librdkafka threads. + // + /// And store them in a separate list (thread_statuses) to make sure that they will be destroyed, + /// regardless how librdkafka calls the hooks. + /// But this can trigger use-after-free if librdkafka will not destroy threads after rd_kafka_wait_destroyed() + auto thread_status = std::make_shared(); + std::lock_guard lock(self->thread_statuses_mutex); + self->thread_statuses.emplace_back(std::move(thread_status)); + + return RD_KAFKA_RESP_ERR_NO_ERROR; +} + +template +rd_kafka_resp_err_t KafkaInterceptors::rdKafkaOnThreadExit(rd_kafka_t *, rd_kafka_thread_type_t, const char *, void * ctx) +{ + TStorageKafka * self = reinterpret_cast(ctx); + CurrentMetrics::sub(CurrentMetrics::KafkaLibrdkafkaThreads, 1); + + std::lock_guard lock(self->thread_statuses_mutex); + const auto it = std::find_if( + self->thread_statuses.begin(), + self->thread_statuses.end(), + [](const auto & thread_status_ptr) { return thread_status_ptr.get() == current_thread; }); + if (it == self->thread_statuses.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No thread status for this librdkafka thread."); + + self->thread_statuses.erase(it); + + return RD_KAFKA_RESP_ERR_NO_ERROR; +} + +template +rd_kafka_resp_err_t KafkaInterceptors::rdKafkaOnNew( + rd_kafka_t * rk, const rd_kafka_conf_t *, void * ctx, char * /*errstr*/, size_t /*errstr_size*/) +{ + TStorageKafka * self = reinterpret_cast(ctx); + rd_kafka_resp_err_t status; + + status = rd_kafka_interceptor_add_on_thread_start(rk, "init-thread", rdKafkaOnThreadStart, ctx); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + { + LOG_ERROR(self->log, "Cannot set on thread start interceptor due to {} error", status); + return status; + } + + status = rd_kafka_interceptor_add_on_thread_exit(rk, "exit-thread", rdKafkaOnThreadExit, ctx); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + LOG_ERROR(self->log, "Cannot set on thread exit interceptor due to {} error", status); + + return status; +} + +template +rd_kafka_resp_err_t KafkaInterceptors::rdKafkaOnConfDup( + rd_kafka_conf_t * new_conf, const rd_kafka_conf_t * /*old_conf*/, size_t /*filter_cnt*/, const char ** /*filter*/, void * ctx) +{ + TStorageKafka * self = reinterpret_cast(ctx); + rd_kafka_resp_err_t status; + + // cppkafka copies configuration multiple times + status = rd_kafka_conf_interceptor_add_on_conf_dup(new_conf, "init", rdKafkaOnConfDup, ctx); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + { + LOG_ERROR(self->log, "Cannot set on conf dup interceptor due to {} error", status); + return status; + } + + status = rd_kafka_conf_interceptor_add_on_new(new_conf, "init", rdKafkaOnNew, ctx); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + LOG_ERROR(self->log, "Cannot set on conf new interceptor due to {} error", status); + + return status; +} + +template struct KafkaInterceptors; +template struct KafkaInterceptors; + +namespace +{ + +void setKafkaConfigValue(cppkafka::Configuration & kafka_config, const String & key, const String & value) +{ + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_name_in_kafka_config = (key == "log_level") ? key : boost::replace_all_copy(key, "_", "."); + kafka_config.set(setting_name_in_kafka_config, value); +} + +void loadConfigProperty( + cppkafka::Configuration & kafka_config, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const String & tag) +{ + const String property_path = config_prefix + "." + tag; + const String property_value = config.getString(property_path); + + setKafkaConfigValue(kafka_config, tag, property_value); +} + +void loadNamedCollectionConfig(cppkafka::Configuration & kafka_config, const String & collection_name, const String & config_prefix) +{ + const auto & collection = NamedCollectionFactory::instance().get(collection_name); + for (const auto & key : collection->getKeys(-1, config_prefix)) + { + // Cut prefix with '.' before actual config tag. + const auto param_name = key.substr(config_prefix.size() + 1); + setKafkaConfigValue(kafka_config, param_name, collection->get(key)); + } +} + +void loadLegacyTopicConfig( + cppkafka::Configuration & kafka_config, + const Poco::Util::AbstractConfiguration & config, + const String & collection_name, + const String & config_prefix) +{ + if (!collection_name.empty()) + { + loadNamedCollectionConfig(kafka_config, collection_name, config_prefix); + return; + } + + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); + + for (const auto & tag : tags) + { + loadConfigProperty(kafka_config, config, config_prefix, tag); + } +} + +/// Read server configuration into cppkafa configuration, used by new per-topic configuration +void loadTopicConfig( + cppkafka::Configuration & kafka_config, + const Poco::Util::AbstractConfiguration & config, + const String & collection_name, + const String & config_prefix, + const String & topic) +{ + if (!collection_name.empty()) + { + const auto topic_prefix = fmt::format("{}.{}", config_prefix, KafkaConfigLoader::CONFIG_KAFKA_TOPIC_TAG); + const auto & collection = NamedCollectionFactory::instance().get(collection_name); + for (const auto & key : collection->getKeys(1, config_prefix)) + { + /// Only consider key . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + if (!key.starts_with(topic_prefix)) + continue; + + const String kafka_topic_path = config_prefix + "." + key; + const String kafka_topic_name_path = kafka_topic_path + "." + KafkaConfigLoader::CONFIG_NAME_TAG; + if (topic == collection->get(kafka_topic_name_path)) + /// Found it! Now read the per-topic configuration into cppkafka. + loadNamedCollectionConfig(kafka_config, collection_name, kafka_topic_path); + } + } + else + { + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); + + for (const auto & tag : tags) + { + if (tag == KafkaConfigLoader::CONFIG_NAME_TAG) + continue; // ignore , it is used to match topic configurations + loadConfigProperty(kafka_config, config, config_prefix, tag); + } + } +} + +/// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration +void loadFromConfig( + cppkafka::Configuration & kafka_config, const KafkaConfigLoader::LoadConfigParams & params, const String & config_prefix) +{ + if (!params.collection_name.empty()) + { + loadNamedCollectionConfig(kafka_config, params.collection_name, config_prefix); + return; + } + + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + params.config.keys(config_prefix, tags); + + for (const auto & tag : tags) + { + if (tag == KafkaConfigLoader::CONFIG_KAFKA_PRODUCER_TAG || tag == KafkaConfigLoader::CONFIG_KAFKA_CONSUMER_TAG) + /// Do not load consumer/producer properties, since they should be separated by different configuration objects. + continue; + + if (tag.starts_with( + KafkaConfigLoader::CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + { + // Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball": + // + // + // football + // 250 + // 5000 + // + // + // baseball + // 300 + // 2000 + // + // + // Advantages: The period restriction no longer applies (e.g. sports.football will work), everything + // Kafka-related is below . + for (const auto & topic : params.topics) + { + /// Read topic name between ... + const String kafka_topic_path = config_prefix + "." + tag; + const String kafka_topic_name_path = kafka_topic_path + "." + KafkaConfigLoader::CONFIG_NAME_TAG; + const String topic_name = params.config.getString(kafka_topic_name_path); + + if (topic_name != topic) + continue; + loadTopicConfig(kafka_config, params.config, params.collection_name, kafka_topic_path, topic); + } + continue; + } + if (tag.starts_with(KafkaConfigLoader::CONFIG_KAFKA_TAG)) + /// skip legacy configuration per topic e.g. . + /// it will be processed is a separate function + continue; + // Update configuration from the configuration. Example: + // + // 250 + // 100000 + // + loadConfigProperty(kafka_config, params.config, config_prefix, tag); + } +} + +void loadLegacyConfigSyntax( + cppkafka::Configuration & kafka_config, + const Poco::Util::AbstractConfiguration & config, + const String & collection_name, + const Names & topics) +{ + for (const auto & topic : topics) + { + const String kafka_topic_path = KafkaConfigLoader::CONFIG_KAFKA_TAG + "." + KafkaConfigLoader::CONFIG_KAFKA_TAG + "_" + topic; + loadLegacyTopicConfig(kafka_config, config, collection_name, kafka_topic_path); + } +} + +void loadConsumerConfig(cppkafka::Configuration & kafka_config, const KafkaConfigLoader::LoadConfigParams & params) +{ + const String consumer_path = KafkaConfigLoader::CONFIG_KAFKA_TAG + "." + KafkaConfigLoader::CONFIG_KAFKA_CONSUMER_TAG; + loadLegacyConfigSyntax(kafka_config, params.config, params.collection_name, params.topics); + // A new syntax has higher priority + loadFromConfig(kafka_config, params, consumer_path); +} + +void loadProducerConfig(cppkafka::Configuration & kafka_config, const KafkaConfigLoader::LoadConfigParams & params) +{ + const String producer_path = KafkaConfigLoader::CONFIG_KAFKA_TAG + "." + KafkaConfigLoader::CONFIG_KAFKA_PRODUCER_TAG; + loadLegacyConfigSyntax(kafka_config, params.config, params.collection_name, params.topics); + // A new syntax has higher priority + loadFromConfig(kafka_config, params, producer_path); +} + +template +void updateGlobalConfiguration( + cppkafka::Configuration & kafka_config, TKafkaStorage & storage, const KafkaConfigLoader::LoadConfigParams & params) +{ + loadFromConfig(kafka_config, params, KafkaConfigLoader::CONFIG_KAFKA_TAG); + +#if USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.kinit.cmd")) + LOG_WARNING(params.log, "sasl.kerberos.kinit.cmd configuration parameter is ignored."); + + kafka_config.set("sasl.kerberos.kinit.cmd", ""); + kafka_config.set("sasl.kerberos.min.time.before.relogin", "0"); + + if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal")) + { + String keytab = kafka_config.get("sasl.kerberos.keytab"); + String principal = kafka_config.get("sasl.kerberos.principal"); + LOG_DEBUG(params.log, "Running KerberosInit"); + try + { + kerberosInit(keytab, principal); + } + catch (const Exception & e) + { + LOG_ERROR(params.log, "KerberosInit failure: {}", getExceptionMessage(e, false)); + } + LOG_DEBUG(params.log, "Finished KerberosInit"); + } +#else // USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal")) + LOG_WARNING(params.log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support."); +#endif // USE_KRB5 + // No need to add any prefix, messages can be distinguished + kafka_config.set_log_callback( + [log = params.log](cppkafka::KafkaHandleBase & handle, int level, const std::string & facility, const std::string & message) + { + auto [poco_level, client_logs_level] = parseSyslogLevel(level); + const auto & kafka_object_config = handle.get_configuration(); + const std::string client_id_key{"client.id"}; + chassert(kafka_object_config.has_property(client_id_key) && "Kafka configuration doesn't have expected client.id set"); + LOG_IMPL( + log, + client_logs_level, + poco_level, + "[client.id:{}] [rdk:{}] {}", + kafka_object_config.get(client_id_key), + facility, + message); + }); + + /// NOTE: statistics should be consumed, otherwise it creates too much + /// entries in the queue, that leads to memory leak and slow shutdown. + if (!kafka_config.has_property("statistics.interval.ms")) + { + // every 3 seconds by default. set to 0 to disable. + kafka_config.set("statistics.interval.ms", "3000"); + } + // Configure interceptor to change thread name + // + // TODO: add interceptors support into the cppkafka. + // XXX: rdkafka uses pthread_set_name_np(), but glibc-compatibility overrides it to noop. + { + // This should be safe, since we wait the rdkafka object anyway. + void * self = static_cast(&storage); + + int status; + + status + = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(), "init", KafkaInterceptors::rdKafkaOnNew, self); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + LOG_ERROR(params.log, "Cannot set new interceptor due to {} error", status); + + // cppkafka always copy the configuration + status = rd_kafka_conf_interceptor_add_on_conf_dup( + kafka_config.get_handle(), "init", KafkaInterceptors::rdKafkaOnConfDup, self); + if (status != RD_KAFKA_RESP_ERR_NO_ERROR) + LOG_ERROR(params.log, "Cannot set dup conf interceptor due to {} error", status); + } +} + +} + +template +cppkafka::Configuration KafkaConfigLoader::getConsumerConfiguration(TKafkaStorage & storage, const ConsumerConfigParams & params) +{ + cppkafka::Configuration conf; + + conf.set("metadata.broker.list", params.brokers); + conf.set("group.id", params.group); + if (params.multiple_consumers) + conf.set("client.id", fmt::format("{}-{}", params.client_id, params.consumer_number)); + else + conf.set("client.id", params.client_id); + conf.set("client.software.name", VERSION_NAME); + conf.set("client.software.version", VERSION_DESCRIBE); + conf.set("auto.offset.reset", "earliest"); // If no offset stored for this group, read all messages from the start + + // that allows to prevent fast draining of the librdkafka queue + // during building of single insert block. Improves performance + // significantly, but may lead to bigger memory consumption. + size_t default_queued_min_messages = 100000; // must be greater than or equal to default + size_t max_allowed_queued_min_messages = 10000000; // must be less than or equal to max allowed value + conf.set( + "queued.min.messages", std::min(std::max(params.max_block_size, default_queued_min_messages), max_allowed_queued_min_messages)); + + updateGlobalConfiguration(conf, storage, params); + loadConsumerConfig(conf, params); + + // those settings should not be changed by users. + conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished + conf.set("enable.auto.offset.store", "false"); // Update offset automatically - to commit them all at once. + conf.set("enable.partition.eof", "false"); // Ignore EOF messages + + for (auto & property : conf.get_all()) + { + LOG_TRACE(params.log, "Consumer set property {}:{}", property.first, property.second); + } + + return conf; +} + +template cppkafka::Configuration +KafkaConfigLoader::getConsumerConfiguration(StorageKafka & storage, const ConsumerConfigParams & params); +template cppkafka::Configuration +KafkaConfigLoader::getConsumerConfiguration(StorageKafka2 & storage, const ConsumerConfigParams & params); + +template +cppkafka::Configuration KafkaConfigLoader::getProducerConfiguration(TKafkaStorage & storage, const ProducerConfigParams & params) +{ + cppkafka::Configuration conf; + conf.set("metadata.broker.list", params.brokers); + conf.set("client.id", params.client_id); + conf.set("client.software.name", VERSION_NAME); + conf.set("client.software.version", VERSION_DESCRIBE); + + updateGlobalConfiguration(conf, storage, params); + loadProducerConfig(conf, params); + + for (auto & property : conf.get_all()) + { + LOG_TRACE(params.log, "Producer set property {}:{}", property.first, property.second); + } + + return conf; +} + +template cppkafka::Configuration +KafkaConfigLoader::getProducerConfiguration(StorageKafka & storage, const ProducerConfigParams & params); +template cppkafka::Configuration +KafkaConfigLoader::getProducerConfiguration(StorageKafka2 & storage, const ProducerConfigParams & params); + +} diff --git a/src/Storages/Kafka/KafkaConfigLoader.h b/src/Storages/Kafka/KafkaConfigLoader.h new file mode 100644 index 00000000000..f18683c17f0 --- /dev/null +++ b/src/Storages/Kafka/KafkaConfigLoader.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ +struct KafkaSettings; +class VirtualColumnsDescription; + +struct KafkaConfigLoader +{ + static inline const String CONFIG_KAFKA_TAG = "kafka"; + static inline const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic"; + static inline const String CONFIG_NAME_TAG = "name"; + static inline const String CONFIG_KAFKA_CONSUMER_TAG = "consumer"; + static inline const String CONFIG_KAFKA_PRODUCER_TAG = "producer"; + using LogCallback = cppkafka::Configuration::LogCallback; + + + struct LoadConfigParams + { + const Poco::Util::AbstractConfiguration & config; + String & collection_name; + const Names & topics; + LoggerPtr & log; + }; + + struct ConsumerConfigParams : public LoadConfigParams + { + String brokers; + String group; + bool multiple_consumers; + size_t consumer_number; + String client_id; + size_t max_block_size; + }; + + struct ProducerConfigParams : public LoadConfigParams + { + String brokers; + String client_id; + }; + + template + static cppkafka::Configuration getConsumerConfiguration(TKafkaStorage & storage, const ConsumerConfigParams & params); + + template + static cppkafka::Configuration getProducerConfiguration(TKafkaStorage & storage, const ProducerConfigParams & params); +}; +} diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 9ba42b9875e..d9256cf39ce 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -20,13 +21,12 @@ namespace CurrentMetrics namespace ProfileEvents { - extern const Event KafkaRebalanceRevocations; - extern const Event KafkaRebalanceAssignments; - extern const Event KafkaRebalanceErrors; - extern const Event KafkaMessagesPolled; - extern const Event KafkaCommitFailures; - extern const Event KafkaCommits; - extern const Event KafkaConsumerErrors; +extern const Event KafkaRebalanceRevocations; +extern const Event KafkaRebalanceAssignments; +extern const Event KafkaRebalanceErrors; +extern const Event KafkaMessagesPolled; +extern const Event KafkaCommitFailures; +extern const Event KafkaCommits; } namespace DB @@ -199,44 +199,9 @@ KafkaConsumer::~KafkaConsumer() // https://github.com/confluentinc/confluent-kafka-go/issues/189 etc. void KafkaConsumer::drain() { - auto start_time = std::chrono::steady_clock::now(); - cppkafka::Error last_error(RD_KAFKA_RESP_ERR_NO_ERROR); - - while (true) - { - auto msg = consumer->poll(100ms); - if (!msg) - break; - - auto error = msg.get_error(); - - if (error) - { - if (msg.is_eof() || error == last_error) - { - break; - } - else - { - LOG_ERROR(log, "Error during draining: {}", error); - setExceptionInfo(error); - } - } - - // i don't stop draining on first error, - // only if it repeats once again sequentially - last_error = error; - - auto ts = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(ts-start_time) > DRAIN_TIMEOUT_MS) - { - LOG_ERROR(log, "Timeout during draining."); - break; - } - } + StorageKafkaUtils::drainConsumer(*consumer, DRAIN_TIMEOUT_MS, log, [this](const cppkafka::Error & err) { setExceptionInfo(err); }); } - void KafkaConsumer::commit() { auto print_offsets = [this] (const char * prefix, const cppkafka::TopicPartitionList & offsets) @@ -409,7 +374,7 @@ void KafkaConsumer::resetToLastCommitted(const char * msg) { if (!assignment.has_value() || assignment->empty()) { - LOG_TRACE(log, "Not assignned. Can't reset to last committed position."); + LOG_TRACE(log, "Not assigned. Can't reset to last committed position."); return; } auto committed_offset = consumer->get_offsets_committed(consumer->get_assignment()); @@ -473,7 +438,7 @@ ReadBufferPtr KafkaConsumer::consume() // If we're doing a manual select then it's better to get something after a wait, then immediate nothing. if (!assignment.has_value()) { - waited_for_assignment += poll_timeout; // slightly innaccurate, but rough calculation is ok. + waited_for_assignment += poll_timeout; // slightly inaccurate, but rough calculation is ok. if (waited_for_assignment < MAX_TIME_TO_WAIT_FOR_ASSIGNMENT_MS) { continue; @@ -535,26 +500,12 @@ ReadBufferPtr KafkaConsumer::getNextMessage() return getNextMessage(); } -size_t KafkaConsumer::filterMessageErrors() +void KafkaConsumer::filterMessageErrors() { assert(current == messages.begin()); - size_t skipped = std::erase_if(messages, [this](auto & message) - { - if (auto error = message.get_error()) - { - ProfileEvents::increment(ProfileEvents::KafkaConsumerErrors); - LOG_ERROR(log, "Consumer error: {}", error); - setExceptionInfo(error); - return true; - } - return false; - }); - - if (skipped) - LOG_ERROR(log, "There were {} messages with an error", skipped); - - return skipped; + StorageKafkaUtils::eraseMessageErrors(messages, log, [this](const cppkafka::Error & err) { setExceptionInfo(err); }); + current = messages.begin(); } void KafkaConsumer::resetIfStopped() diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index 4daf8652c3b..285f3680213 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -1,14 +1,12 @@ #pragma once #include -#include #include #include #include #include -#include #include namespace CurrentMetrics @@ -193,12 +191,8 @@ private: void drain(); void cleanUnprocessed(); void resetIfStopped(); - /// Return number of messages with an error. - size_t filterMessageErrors(); + void filterMessageErrors(); ReadBufferPtr getNextMessage(); }; } - -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/src/Storages/Kafka/KafkaConsumer2.cpp b/src/Storages/Kafka/KafkaConsumer2.cpp new file mode 100644 index 00000000000..60626dfa402 --- /dev/null +++ b/src/Storages/Kafka/KafkaConsumer2.cpp @@ -0,0 +1,384 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + + +namespace CurrentMetrics +{ +extern const Metric KafkaAssignedPartitions; +extern const Metric KafkaConsumersWithAssignment; +} + +namespace ProfileEvents +{ +extern const Event KafkaRebalanceRevocations; +extern const Event KafkaRebalanceAssignments; +extern const Event KafkaRebalanceErrors; +extern const Event KafkaMessagesPolled; +extern const Event KafkaCommitFailures; +extern const Event KafkaCommits; +} + +namespace DB +{ + +using namespace std::chrono_literals; +static constexpr auto EVENT_POLL_TIMEOUT = 50ms; +static constexpr auto DRAIN_TIMEOUT_MS = 5000ms; + + +bool KafkaConsumer2::TopicPartition::operator<(const TopicPartition & other) const +{ + return std::tie(topic, partition_id, offset) < std::tie(other.topic, other.partition_id, other.offset); +} + + +KafkaConsumer2::KafkaConsumer2( + ConsumerPtr consumer_, + LoggerPtr log_, + size_t max_batch_size, + size_t poll_timeout_, + const std::atomic & stopped_, + const Names & topics_) + : consumer(consumer_) + , log(log_) + , batch_size(max_batch_size) + , poll_timeout(poll_timeout_) + , stopped(stopped_) + , current(messages.begin()) + , topics(topics_) +{ + // called (synchronously, during poll) when we enter the consumer group + consumer->set_assignment_callback( + [this](const cppkafka::TopicPartitionList & topic_partitions) + { + CurrentMetrics::add(CurrentMetrics::KafkaAssignedPartitions, topic_partitions.size()); + ProfileEvents::increment(ProfileEvents::KafkaRebalanceAssignments); + + if (topic_partitions.empty()) + { + LOG_INFO(log, "Got empty assignment: Not enough partitions in the topic for all consumers?"); + } + else + { + LOG_TRACE(log, "Topics/partitions assigned: {}", topic_partitions); + CurrentMetrics::add(CurrentMetrics::KafkaConsumersWithAssignment, 1); + } + + chassert(!assignment.has_value()); + + assignment.emplace(); + assignment->reserve(topic_partitions.size()); + needs_offset_update = true; + for (const auto & topic_partition : topic_partitions) + { + assignment->push_back( + TopicPartition{topic_partition.get_topic(), topic_partition.get_partition(), topic_partition.get_offset()}); + } + + // We need to initialize the queues here in order to detach them from the consumer queue. Otherwise `pollEvents` might eventually poll actual messages also. + initializeQueues(topic_partitions); + }); + + // called (synchronously, during poll) when we leave the consumer group + consumer->set_revocation_callback( + [this](const cppkafka::TopicPartitionList & topic_partitions) + { + CurrentMetrics::sub(CurrentMetrics::KafkaAssignedPartitions, topic_partitions.size()); + ProfileEvents::increment(ProfileEvents::KafkaRebalanceRevocations); + + // Rebalance is happening now, and now we have a chance to finish the work + // with topics/partitions we were working with before rebalance + LOG_TRACE(log, "Rebalance initiated. Revoking partitions: {}", topic_partitions); + + if (!topic_partitions.empty()) + { + CurrentMetrics::sub(CurrentMetrics::KafkaConsumersWithAssignment, 1); + } + + assignment.reset(); + queues.clear(); + needs_offset_update = true; + }); + + consumer->set_rebalance_error_callback( + [this](cppkafka::Error err) + { + LOG_ERROR(log, "Rebalance error: {}", err); + ProfileEvents::increment(ProfileEvents::KafkaRebalanceErrors); + }); +} + +KafkaConsumer2::~KafkaConsumer2() +{ + try + { + if (!consumer->get_subscription().empty()) + { + try + { + consumer->unsubscribe(); + } + catch (const cppkafka::HandleException & e) + { + LOG_ERROR(log, "Error during unsubscribe: {}", e.what()); + } + drainConsumerQueue(); + } + } + catch (const cppkafka::HandleException & e) + { + LOG_ERROR(log, "Error while destructing consumer: {}", e.what()); + } +} + +// Needed to drain rest of the messages / queued callback calls from the consumer after unsubscribe, otherwise consumer +// will hang on destruction. Partition queues doesn't have to be attached as events are not handled by those queues. +// see https://github.com/edenhill/librdkafka/issues/2077 +// https://github.com/confluentinc/confluent-kafka-go/issues/189 etc. +void KafkaConsumer2::drainConsumerQueue() +{ + StorageKafkaUtils::drainConsumer(*consumer, DRAIN_TIMEOUT_MS, log); +} + +void KafkaConsumer2::pollEvents() +{ + static constexpr auto max_tries = 5; + for (auto i = 0; i < max_tries; ++i) + { + auto msg = consumer->poll(EVENT_POLL_TIMEOUT); + if (!msg) + return; + // All the partition queues are detached, so the consumer shouldn't be able to poll any real messages + const auto err = msg.get_error(); + chassert(RD_KAFKA_RESP_ERR_NO_ERROR != err.get_error() && "Consumer returned a message when it was not expected"); + LOG_ERROR(log, "Consumer received error while polling events, code {}, error '{}'", err.get_error(), err.to_string()); + } +}; + +bool KafkaConsumer2::polledDataUnusable(const TopicPartition & topic_partition) const +{ + const auto different_topic_partition = current == messages.end() + ? false + : (current->get_topic() != topic_partition.topic || current->get_partition() != topic_partition.partition_id); + return different_topic_partition; +} + +KafkaConsumer2::TopicPartitions const * KafkaConsumer2::getKafkaAssignment() const +{ + if (assignment.has_value()) + { + return &*assignment; + } + + return nullptr; +} + +void KafkaConsumer2::updateOffsets(const TopicPartitions & topic_partitions) +{ + cppkafka::TopicPartitionList original_topic_partitions; + original_topic_partitions.reserve(topic_partitions.size()); + std::transform( + topic_partitions.begin(), + topic_partitions.end(), + std::back_inserter(original_topic_partitions), + [](const TopicPartition & tp) + { + return cppkafka::TopicPartition{tp.topic, tp.partition_id, tp.offset}; + }); + initializeQueues(original_topic_partitions); + needs_offset_update = false; + stalled_status = StalledStatus::NOT_STALLED; +} + +void KafkaConsumer2::initializeQueues(const cppkafka::TopicPartitionList & topic_partitions) +{ + queues.clear(); + messages.clear(); + current = messages.end(); + // cppkafka itself calls assign(), but in order to detach the queues here we have to do the assignment manually. Later on we have to reassign the topic partitions with correct offsets. + consumer->assign(topic_partitions); + for (const auto & topic_partition : topic_partitions) + // This will also detach the partition queues from the consumer, thus the messages won't be forwarded without attaching them manually + queues.emplace( + TopicPartition{topic_partition.get_topic(), topic_partition.get_partition(), topic_partition.get_offset()}, + consumer->get_partition_queue(topic_partition)); +} + +// it do the poll when needed +ReadBufferPtr KafkaConsumer2::consume(const TopicPartition & topic_partition, const std::optional & message_count) +{ + resetIfStopped(); + + if (polledDataUnusable(topic_partition)) + return nullptr; + + if (hasMorePolledMessages()) + { + if (auto next_message = getNextMessage(); next_message) + return next_message; + } + + while (true) + { + stalled_status = StalledStatus::NO_MESSAGES_RETURNED; + + auto & queue_to_poll_from = queues.at(topic_partition); + LOG_TRACE(log, "Batch size {}, offset {}", batch_size, topic_partition.offset); + const auto messages_to_pull = message_count.value_or(batch_size); + /// Don't drop old messages immediately, since we may need them for virtual columns. + auto new_messages = queue_to_poll_from.consume_batch(messages_to_pull, std::chrono::milliseconds(poll_timeout)); + + resetIfStopped(); + if (stalled_status == StalledStatus::CONSUMER_STOPPED) + { + return nullptr; + } + + if (new_messages.empty()) + { + LOG_TRACE(log, "Stalled"); + return nullptr; + } + else + { + messages = std::move(new_messages); + current = messages.begin(); + LOG_TRACE( + log, + "Polled batch of {} messages. Offsets position: {}", + messages.size(), + consumer->get_offsets_position(consumer->get_assignment())); + break; + } + } + + filterMessageErrors(); + if (current == messages.end()) + { + LOG_ERROR(log, "Only errors left"); + stalled_status = StalledStatus::ERRORS_RETURNED; + return nullptr; + } + + ProfileEvents::increment(ProfileEvents::KafkaMessagesPolled, messages.size()); + + stalled_status = StalledStatus::NOT_STALLED; + return getNextMessage(); +} + +void KafkaConsumer2::commit(const TopicPartition & topic_partition) +{ + static constexpr auto max_retries = 5; + bool committed = false; + + LOG_TEST( + log, + "Trying to commit offset {} to Kafka for topic-partition [{}:{}]", + topic_partition.offset, + topic_partition.topic, + topic_partition.partition_id); + + const auto topic_partition_list = std::vector{cppkafka::TopicPartition{ + topic_partition.topic, + topic_partition.partition_id, + topic_partition.offset, + }}; + for (auto try_count = 0; try_count < max_retries && !committed; ++try_count) + { + try + { + // See https://github.com/edenhill/librdkafka/issues/1470 + // broker may reject commit if during offsets.commit.timeout.ms (5000 by default), + // there were not enough replicas available for the __consumer_offsets topic. + // also some other temporary issues like client-server connectivity problems are possible + + consumer->commit(topic_partition_list); + committed = true; + LOG_INFO( + log, + "Committed offset {} to Kafka for topic-partition [{}:{}]", + topic_partition.offset, + topic_partition.topic, + topic_partition.partition_id); + } + catch (const cppkafka::HandleException & e) + { + // If there were actually no offsets to commit, return. Retrying won't solve + // anything here + if (e.get_error() == RD_KAFKA_RESP_ERR__NO_OFFSET) + committed = true; + else + LOG_ERROR(log, "Exception during attempt to commit to Kafka: {}", e.what()); + } + } + + if (!committed) + { + // The failure is not the biggest issue, it only counts when a table is dropped and recreated, otherwise the offsets are taken from keeper. + ProfileEvents::increment(ProfileEvents::KafkaCommitFailures); + LOG_ERROR(log, "All commit attempts failed"); + } + else + { + ProfileEvents::increment(ProfileEvents::KafkaCommits); + } +} + +void KafkaConsumer2::subscribeIfNotSubscribedYet() +{ + if (likely(is_subscribed)) + return; + + consumer->subscribe(topics); + is_subscribed = true; + LOG_DEBUG(log, "Subscribed."); +} + +ReadBufferPtr KafkaConsumer2::getNextMessage() +{ + while (current != messages.end()) + { + const auto * data = current->get_payload().get_data(); + size_t size = current->get_payload().get_size(); + ++current; + + // `data` can be nullptr on case of the Kafka message has empty payload + if (data) + return std::make_shared(data, size); + } + + return nullptr; +} + +void KafkaConsumer2::filterMessageErrors() +{ + assert(current == messages.begin()); + + StorageKafkaUtils::eraseMessageErrors(messages, log); + current = messages.begin(); +} + +void KafkaConsumer2::resetIfStopped() +{ + if (stopped) + { + stalled_status = StalledStatus::CONSUMER_STOPPED; + } +} +} diff --git a/src/Storages/Kafka/KafkaConsumer2.h b/src/Storages/Kafka/KafkaConsumer2.h new file mode 100644 index 00000000000..dd2cfe87aa0 --- /dev/null +++ b/src/Storages/Kafka/KafkaConsumer2.h @@ -0,0 +1,162 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace CurrentMetrics +{ +extern const Metric KafkaConsumers; +} + +namespace Poco +{ +class Logger; +} + +namespace DB +{ + +using ConsumerPtr = std::shared_ptr; + +class KafkaConsumer2 +{ +public: + static inline constexpr int INVALID_OFFSET = RD_KAFKA_OFFSET_INVALID; + static inline constexpr int BEGINNING_OFFSET = RD_KAFKA_OFFSET_BEGINNING; + static inline constexpr int END_OFFSET = RD_KAFKA_OFFSET_END; + + struct TopicPartition + { + String topic; + int32_t partition_id; + int64_t offset{INVALID_OFFSET}; + + bool operator==(const TopicPartition &) const = default; + bool operator<(const TopicPartition & other) const; + }; + + using TopicPartitions = std::vector; + + struct OnlyTopicNameAndPartitionIdHash + { + std::size_t operator()(const TopicPartition & tp) const + { + SipHash s; + s.update(tp.topic); + s.update(tp.partition_id); + return s.get64(); + } + }; + + struct OnlyTopicNameAndPartitionIdEquality + { + bool operator()(const TopicPartition & lhs, const TopicPartition & rhs) const + { + return lhs.topic == rhs.topic && lhs.partition_id == rhs.partition_id; + } + }; + + struct TopicPartitionCount + { + String topic; + size_t partition_count; + }; + + using TopicPartitionCounts = std::vector; + + KafkaConsumer2( + ConsumerPtr consumer_, + LoggerPtr log_, + size_t max_batch_size, + size_t poll_timeout_, + const std::atomic & stopped_, + const Names & topics_); + + ~KafkaConsumer2(); + + // Poll only the main consumer queue without any topic-partition queues. This is useful to get notified about events, such as rebalance, + // new assignment, etc.. + void pollEvents(); + + auto pollTimeout() const { return poll_timeout; } + + inline bool hasMorePolledMessages() const { return (stalled_status == StalledStatus::NOT_STALLED) && (current != messages.end()); } + + inline bool isStalled() const { return stalled_status != StalledStatus::NOT_STALLED; } + + // Returns the topic partitions that the consumer got from rebalancing the consumer group. If the consumer received + // no topic partitions or all of them were revoked, it returns a null pointer. + TopicPartitions const * getKafkaAssignment() const; + + // As the main source of offsets is not Kafka, the offsets needs to be pushed to the consumer from outside + // Returns true if it received new assignment and internal state should be updated by updateOffsets + bool needsOffsetUpdate() const { return needs_offset_update; } + void updateOffsets(const TopicPartitions & topic_partitions); + + /// Polls batch of messages from the given topic-partition and returns read buffer containing the next message or + /// nullptr when there are no messages to process. + ReadBufferPtr consume(const TopicPartition & topic_partition, const std::optional & message_count); + + void commit(const TopicPartition & topic_partition); + + // Return values for the message that's being read. + String currentTopic() const { return current[-1].get_topic(); } + String currentKey() const { return current[-1].get_key(); } + auto currentOffset() const { return current[-1].get_offset(); } + auto currentPartition() const { return current[-1].get_partition(); } + auto currentTimestamp() const { return current[-1].get_timestamp(); } + const auto & currentHeaderList() const { return current[-1].get_header_list(); } + String currentPayload() const { return current[-1].get_payload(); } + + void subscribeIfNotSubscribedYet(); + +private: + using Messages = std::vector; + CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaConsumers}; + + enum class StalledStatus + { + NOT_STALLED, + NO_MESSAGES_RETURNED, + CONSUMER_STOPPED, + NO_ASSIGNMENT, + ERRORS_RETURNED + }; + + ConsumerPtr consumer; + LoggerPtr log; + const size_t batch_size = 1; + const size_t poll_timeout = 0; + + StalledStatus stalled_status = StalledStatus::NO_MESSAGES_RETURNED; + + const std::atomic & stopped; + bool is_subscribed = false; + + // order is important, need to be destructed before consumer + Messages messages; + Messages::const_iterator current; + + // order is important, need to be destructed before consumer + std::optional assignment; + bool needs_offset_update{false}; + std::unordered_map queues; + const Names topics; + + bool polledDataUnusable(const TopicPartition & topic_partition) const; + void drainConsumerQueue(); + void resetIfStopped(); + void filterMessageErrors(); + ReadBufferPtr getNextMessage(); + + void initializeQueues(const cppkafka::TopicPartitionList & topic_partitions); +}; + +} diff --git a/src/Storages/Kafka/KafkaSettings.h b/src/Storages/Kafka/KafkaSettings.h index 0addaf9e3b3..9ca5e189f0e 100644 --- a/src/Storages/Kafka/KafkaSettings.h +++ b/src/Storages/Kafka/KafkaSettings.h @@ -38,6 +38,8 @@ const auto KAFKA_CONSUMERS_POOL_TTL_MS_MAX = 600'000; M(StreamingHandleErrorMode, kafka_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for Kafka engine. Possible values: default (throw an exception after rabbitmq_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ M(Bool, kafka_commit_on_select, false, "Commit messages when select query is made", 0) \ M(UInt64, kafka_max_rows_per_message, 1, "The maximum number of rows produced in one kafka message for row-based formats.", 0) \ + M(String, kafka_keeper_path, "", "The path to the table in ClickHouse Keeper", 0) \ + M(String, kafka_replica_name, "", "The replica name in ClickHouse Keeper", 0) \ #define OBSOLETE_KAFKA_SETTINGS(M, ALIAS) \ MAKE_OBSOLETE(M, Char, kafka_row_delimiter, '\0') \ diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index 9c68107872e..3ddd0d1be8c 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -262,7 +262,7 @@ Chunk KafkaSource::generateImpl() // they are not needed here: // and it's misleading to use them here, // as columns 'materialized' that way stays 'ephemeral' - // i.e. will not be stored anythere + // i.e. will not be stored anywhere // If needed any extra columns can be added using DEFAULT they can be added at MV level if needed. auto result_block = non_virtual_header.cloneWithColumns(executor.getResultColumns()); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 3aad64a0cfb..f4f641d1c68 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1,13 +1,5 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -21,18 +13,19 @@ #include #include #include +#include +#include #include #include #include #include #include #include +#include #include #include -#include #include #include -#include #include #include #include @@ -41,10 +34,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -55,13 +48,8 @@ #include #include -#if USE_KRB5 -#include -#endif // USE_KRB5 - namespace CurrentMetrics { - extern const Metric KafkaLibrdkafkaThreads; extern const Metric KafkaBackgroundReads; extern const Metric KafkaConsumersInUse; extern const Metric KafkaWrites; @@ -82,104 +70,10 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int QUERY_NOT_ALLOWED; extern const int ABORTED; } -struct StorageKafkaInterceptors -{ - static rd_kafka_resp_err_t rdKafkaOnThreadStart(rd_kafka_t *, rd_kafka_thread_type_t thread_type, const char *, void * ctx) - { - StorageKafka * self = reinterpret_cast(ctx); - CurrentMetrics::add(CurrentMetrics::KafkaLibrdkafkaThreads, 1); - - const auto & storage_id = self->getStorageID(); - const auto & table = storage_id.getTableName(); - - switch (thread_type) - { - case RD_KAFKA_THREAD_MAIN: - setThreadName(("rdk:m/" + table.substr(0, 9)).c_str()); - break; - case RD_KAFKA_THREAD_BACKGROUND: - setThreadName(("rdk:bg/" + table.substr(0, 8)).c_str()); - break; - case RD_KAFKA_THREAD_BROKER: - setThreadName(("rdk:b/" + table.substr(0, 9)).c_str()); - break; - } - - /// Create ThreadStatus to track memory allocations from librdkafka threads. - // - /// And store them in a separate list (thread_statuses) to make sure that they will be destroyed, - /// regardless how librdkafka calls the hooks. - /// But this can trigger use-after-free if librdkafka will not destroy threads after rd_kafka_wait_destroyed() - auto thread_status = std::make_shared(); - std::lock_guard lock(self->thread_statuses_mutex); - self->thread_statuses.emplace_back(std::move(thread_status)); - - return RD_KAFKA_RESP_ERR_NO_ERROR; - } - static rd_kafka_resp_err_t rdKafkaOnThreadExit(rd_kafka_t *, rd_kafka_thread_type_t, const char *, void * ctx) - { - StorageKafka * self = reinterpret_cast(ctx); - CurrentMetrics::sub(CurrentMetrics::KafkaLibrdkafkaThreads, 1); - - std::lock_guard lock(self->thread_statuses_mutex); - const auto it = std::find_if(self->thread_statuses.begin(), self->thread_statuses.end(), [](const auto & thread_status_ptr) - { - return thread_status_ptr.get() == current_thread; - }); - if (it == self->thread_statuses.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No thread status for this librdkafka thread."); - - self->thread_statuses.erase(it); - - return RD_KAFKA_RESP_ERR_NO_ERROR; - } - - static rd_kafka_resp_err_t rdKafkaOnNew(rd_kafka_t * rk, const rd_kafka_conf_t *, void * ctx, char * /*errstr*/, size_t /*errstr_size*/) - { - StorageKafka * self = reinterpret_cast(ctx); - rd_kafka_resp_err_t status; - - status = rd_kafka_interceptor_add_on_thread_start(rk, "init-thread", rdKafkaOnThreadStart, ctx); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - { - LOG_ERROR(self->log, "Cannot set on thread start interceptor due to {} error", status); - return status; - } - - status = rd_kafka_interceptor_add_on_thread_exit(rk, "exit-thread", rdKafkaOnThreadExit, ctx); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - LOG_ERROR(self->log, "Cannot set on thread exit interceptor due to {} error", status); - - return status; - } - - static rd_kafka_resp_err_t rdKafkaOnConfDup(rd_kafka_conf_t * new_conf, const rd_kafka_conf_t * /*old_conf*/, size_t /*filter_cnt*/, const char ** /*filter*/, void * ctx) - { - StorageKafka * self = reinterpret_cast(ctx); - rd_kafka_resp_err_t status; - - // cppkafka copies configuration multiple times - status = rd_kafka_conf_interceptor_add_on_conf_dup(new_conf, "init", rdKafkaOnConfDup, ctx); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - { - LOG_ERROR(self->log, "Cannot set on conf dup interceptor due to {} error", status); - return status; - } - - status = rd_kafka_conf_interceptor_add_on_new(new_conf, "init", rdKafkaOnNew, ctx); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - LOG_ERROR(self->log, "Cannot set on conf new interceptor due to {} error", status); - - return status; - } -}; - class ReadFromStorageKafka final : public ReadFromStreamLikeEngine { public: @@ -241,182 +135,6 @@ private: StorageSnapshotPtr storage_snapshot; }; -namespace -{ - const String CONFIG_KAFKA_TAG = "kafka"; - const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic"; - const String CONFIG_KAFKA_CONSUMER_TAG = "consumer"; - const String CONFIG_KAFKA_PRODUCER_TAG = "producer"; - const String CONFIG_NAME_TAG = "name"; - - void setKafkaConfigValue(cppkafka::Configuration & kafka_config, const String & key, const String & value) - { - /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. - /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md - const String setting_name_in_kafka_config = (key == "log_level") ? key : boost::replace_all_copy(key, "_", "."); - kafka_config.set(setting_name_in_kafka_config, value); - } - - void loadConfigProperty(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & tag) - { - const String property_path = config_prefix + "." + tag; - const String property_value = config.getString(property_path); - - setKafkaConfigValue(kafka_config, tag, property_value); - } - - void loadNamedCollectionConfig(cppkafka::Configuration & kafka_config, const String & collection_name, const String & config_prefix) - { - const auto & collection = NamedCollectionFactory::instance().get(collection_name); - for (const auto & key : collection->getKeys(-1, config_prefix)) - { - // Cut prefix with '.' before actual config tag. - const auto param_name = key.substr(config_prefix.size() + 1); - setKafkaConfigValue(kafka_config, param_name, collection->get(key)); - } - } - - void loadLegacyTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & config_prefix) - { - if (!collection_name.empty()) - { - loadNamedCollectionConfig(kafka_config, collection_name, config_prefix); - return; - } - - Poco::Util::AbstractConfiguration::Keys tags; - config.keys(config_prefix, tags); - - for (const auto & tag : tags) - { - loadConfigProperty(kafka_config, config, config_prefix, tag); - } - } - - /// Read server configuration into cppkafa configuration, used by new per-topic configuration - void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & config_prefix, const String & topic) - { - if (!collection_name.empty()) - { - const auto topic_prefix = fmt::format("{}.{}", config_prefix, CONFIG_KAFKA_TOPIC_TAG); - const auto & collection = NamedCollectionFactory::instance().get(collection_name); - for (const auto & key : collection->getKeys(1, config_prefix)) - { - /// Only consider key . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. - if (!key.starts_with(topic_prefix)) - continue; - - const String kafka_topic_path = config_prefix + "." + key; - const String kafka_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; - if (topic == collection->get(kafka_topic_name_path)) - /// Found it! Now read the per-topic configuration into cppkafka. - loadNamedCollectionConfig(kafka_config, collection_name, kafka_topic_path); - } - } - else - { - /// Read all tags one level below - Poco::Util::AbstractConfiguration::Keys tags; - config.keys(config_prefix, tags); - - for (const auto & tag : tags) - { - if (tag == CONFIG_NAME_TAG) - continue; // ignore , it is used to match topic configurations - loadConfigProperty(kafka_config, config, config_prefix, tag); - } - } - } - - /// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration - void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & config_prefix, const Names & topics) - { - if (!collection_name.empty()) - { - loadNamedCollectionConfig(kafka_config, collection_name, config_prefix); - return; - } - - /// Read all tags one level below - Poco::Util::AbstractConfiguration::Keys tags; - config.keys(config_prefix, tags); - - for (const auto & tag : tags) - { - if (tag == CONFIG_KAFKA_PRODUCER_TAG || tag == CONFIG_KAFKA_CONSUMER_TAG) - /// Do not load consumer/producer properties, since they should be separated by different configuration objects. - continue; - - if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. - { - // Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball": - // - // - // football - // 250 - // 5000 - // - // - // baseball - // 300 - // 2000 - // - // - // Advantages: The period restriction no longer applies (e.g. sports.football will work), everything - // Kafka-related is below . - for (const auto & topic : topics) - { - /// Read topic name between ... - const String kafka_topic_path = config_prefix + "." + tag; - const String kafka_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; - const String topic_name = config.getString(kafka_topic_name_path); - - if (topic_name != topic) - continue; - loadTopicConfig(kafka_config, config, collection_name, kafka_topic_path, topic); - } - continue; - } - if (tag.starts_with(CONFIG_KAFKA_TAG)) - /// skip legacy configuration per topic e.g. . - /// it will be processed is a separate function - continue; - // Update configuration from the configuration. Example: - // - // 250 - // 100000 - // - loadConfigProperty(kafka_config, config, config_prefix, tag); - } - } - - void loadLegacyConfigSyntax(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & prefix, const Names & topics) - { - for (const auto & topic : topics) - { - const String kafka_topic_path = prefix + "." + CONFIG_KAFKA_TAG + "_" + topic; - loadLegacyTopicConfig(kafka_config, config, collection_name, kafka_topic_path); - } - } - - void loadConsumerConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & prefix, const Names & topics) - { - const String consumer_path = prefix + "." + CONFIG_KAFKA_CONSUMER_TAG; - loadLegacyConfigSyntax(kafka_config, config, collection_name, prefix, topics); - // A new syntax has higher priority - loadFromConfig(kafka_config, config, collection_name, consumer_path, topics); - } - - void loadProducerConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & collection_name, const String & prefix, const Names & topics) - { - const String producer_path = prefix + "." + CONFIG_KAFKA_PRODUCER_TAG; - loadLegacyConfigSyntax(kafka_config, config, collection_name, prefix, topics); - // A new syntax has higher priority - loadFromConfig(kafka_config, config, collection_name, producer_path, topics); - - } -} - StorageKafka::StorageKafka( const StorageID & table_id_, ContextPtr context_, @@ -428,19 +146,20 @@ StorageKafka::StorageKafka( , WithContext(context_->getGlobalContext()) , kafka_settings(std::move(kafka_settings_)) , macros_info{.table_id = table_id_} - , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) + , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) , client_id( - kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) + kafka_settings->kafka_client_id.value.empty() + ? StorageKafkaUtils::getDefaultClientId(table_id_) + : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(getLogger("StorageKafka (" + table_id_.table_name + ")")) , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) - , settings_adjustments(createSettingsAdjustments()) + , settings_adjustments(StorageKafkaUtils::createSettingsAdjustments(*kafka_settings, schema_name)) , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) , collection_name(collection_name_) { @@ -456,7 +175,7 @@ StorageKafka::StorageKafka( storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); + setVirtuals(StorageKafkaUtils::createVirtuals(kafka_settings->kafka_handle_error_mode)); auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) @@ -481,76 +200,6 @@ StorageKafka::StorageKafka( StorageKafka::~StorageKafka() = default; -VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode) -{ - VirtualColumnsDescription desc; - - desc.addEphemeral("_topic", std::make_shared(std::make_shared()), ""); - desc.addEphemeral("_key", std::make_shared(), ""); - desc.addEphemeral("_offset", std::make_shared(), ""); - desc.addEphemeral("_partition", std::make_shared(), ""); - desc.addEphemeral("_timestamp", std::make_shared(std::make_shared()), ""); - desc.addEphemeral("_timestamp_ms", std::make_shared(std::make_shared(3)), ""); - desc.addEphemeral("_headers.name", std::make_shared(std::make_shared()), ""); - desc.addEphemeral("_headers.value", std::make_shared(std::make_shared()), ""); - - if (handle_error_mode == StreamingHandleErrorMode::STREAM) - { - desc.addEphemeral("_raw_message", std::make_shared(), ""); - desc.addEphemeral("_error", std::make_shared(), ""); - } - - return desc; -} - -SettingsChanges StorageKafka::createSettingsAdjustments() -{ - SettingsChanges result; - // Needed for backward compatibility - if (!kafka_settings->input_format_skip_unknown_fields.changed) - { - // Always skip unknown fields regardless of the context (JSON or TSKV) - kafka_settings->input_format_skip_unknown_fields = true; - } - - if (!kafka_settings->input_format_allow_errors_ratio.changed) - { - kafka_settings->input_format_allow_errors_ratio = 0.; - } - - if (!kafka_settings->input_format_allow_errors_num.changed) - { - kafka_settings->input_format_allow_errors_num = kafka_settings->kafka_skip_broken_messages.value; - } - - if (!schema_name.empty()) - result.emplace_back("format_schema", schema_name); - - for (const auto & setting : *kafka_settings) - { - const auto & name = setting.getName(); - if (name.find("kafka_") == std::string::npos) - result.emplace_back(name, setting.getValue()); - } - return result; -} - -Names StorageKafka::parseTopics(String topic_list) -{ - Names result; - boost::split(result,topic_list,[](char c){ return c == ','; }); - for (String & topic : result) - { - boost::trim(topic); - } - return result; -} - -String StorageKafka::getDefaultClientId(const StorageID & table_id_) -{ - return fmt::format("{}-{}-{}-{}", VERSION_NAME, getFQDNOrHostName(), table_id_.database_name, table_id_.table_name); -} - void StorageKafka::read( QueryPlan & query_plan, const Names & column_names, @@ -751,65 +400,26 @@ KafkaConsumerPtr StorageKafka::createKafkaConsumer(size_t consumer_number) topics); return kafka_consumer_ptr; } - cppkafka::Configuration StorageKafka::getConsumerConfiguration(size_t consumer_number) { - cppkafka::Configuration conf; - - conf.set("metadata.broker.list", brokers); - conf.set("group.id", group); - if (num_consumers > 1) - { - conf.set("client.id", fmt::format("{}-{}", client_id, consumer_number)); - } - else - { - conf.set("client.id", client_id); - } - conf.set("client.software.name", VERSION_NAME); - conf.set("client.software.version", VERSION_DESCRIBE); - conf.set("auto.offset.reset", "earliest"); // If no offset stored for this group, read all messages from the start - - // that allows to prevent fast draining of the librdkafka queue - // during building of single insert block. Improves performance - // significantly, but may lead to bigger memory consumption. - size_t default_queued_min_messages = 100000; // must be greater than or equal to default - size_t max_allowed_queued_min_messages = 10000000; // must be less than or equal to max allowed value - conf.set("queued.min.messages", std::min(std::max(getMaxBlockSize(), default_queued_min_messages), max_allowed_queued_min_messages)); - - updateGlobalConfiguration(conf); - updateConsumerConfiguration(conf); - - // those settings should not be changed by users. - conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished - conf.set("enable.auto.offset.store", "false"); // Update offset automatically - to commit them all at once. - conf.set("enable.partition.eof", "false"); // Ignore EOF messages - - for (auto & property : conf.get_all()) - { - LOG_TRACE(log, "Consumer set property {}:{}", property.first, property.second); - } - - return conf; + KafkaConfigLoader::ConsumerConfigParams params{ + {getContext()->getConfigRef(), collection_name, topics, log}, + brokers, + group, + num_consumers > 1, + consumer_number, + client_id, + getMaxBlockSize()}; + return KafkaConfigLoader::getConsumerConfiguration(*this, params); } cppkafka::Configuration StorageKafka::getProducerConfiguration() { - cppkafka::Configuration conf; - conf.set("metadata.broker.list", brokers); - conf.set("client.id", client_id); - conf.set("client.software.name", VERSION_NAME); - conf.set("client.software.version", VERSION_DESCRIBE); - - updateGlobalConfiguration(conf); - updateProducerConfiguration(conf); - - for (auto & property : conf.get_all()) - { - LOG_TRACE(log, "Producer set property {}:{}", property.first, property.second); - } - - return conf; + KafkaConfigLoader::ProducerConfigParams params{ + {getContext()->getConfigRef(), collection_name, topics, log}, + brokers, + client_id}; + return KafkaConfigLoader::getProducerConfiguration(*this, params); } void StorageKafka::cleanConsumers() @@ -887,126 +497,6 @@ size_t StorageKafka::getPollTimeoutMillisecond() const : getContext()->getSettingsRef().stream_poll_timeout_ms.totalMilliseconds(); } -void StorageKafka::updateGlobalConfiguration(cppkafka::Configuration & kafka_config) -{ - const auto & config = getContext()->getConfigRef(); - loadFromConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG, topics); - -#if USE_KRB5 - if (kafka_config.has_property("sasl.kerberos.kinit.cmd")) - LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored."); - - kafka_config.set("sasl.kerberos.kinit.cmd",""); - kafka_config.set("sasl.kerberos.min.time.before.relogin","0"); - - if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal")) - { - String keytab = kafka_config.get("sasl.kerberos.keytab"); - String principal = kafka_config.get("sasl.kerberos.principal"); - LOG_DEBUG(log, "Running KerberosInit"); - try - { - kerberosInit(keytab,principal); - } - catch (const Exception & e) - { - LOG_ERROR(log, "KerberosInit failure: {}", getExceptionMessage(e, false)); - } - LOG_DEBUG(log, "Finished KerberosInit"); - } -#else // USE_KRB5 - if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal")) - LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support."); -#endif // USE_KRB5 - - // No need to add any prefix, messages can be distinguished - kafka_config.set_log_callback( - [this](cppkafka::KafkaHandleBase & handle, int level, const std::string & facility, const std::string & message) - { - auto [poco_level, client_logs_level] = parseSyslogLevel(level); - const auto & kafka_object_config = handle.get_configuration(); - const std::string client_id_key{"client.id"}; - chassert(kafka_object_config.has_property(client_id_key) && "Kafka configuration doesn't have expected client.id set"); - LOG_IMPL( - log, - client_logs_level, - poco_level, - "[client.id:{}] [rdk:{}] {}", - kafka_object_config.get(client_id_key), - facility, - message); - }); - - /// NOTE: statistics should be consumed, otherwise it creates too much - /// entries in the queue, that leads to memory leak and slow shutdown. - if (!kafka_config.has_property("statistics.interval.ms")) - { - // every 3 seconds by default. set to 0 to disable. - kafka_config.set("statistics.interval.ms", "3000"); - } - - // Configure interceptor to change thread name - // - // TODO: add interceptors support into the cppkafka. - // XXX: rdkafka uses pthread_set_name_np(), but glibc-compatibliity overrides it to noop. - { - // This should be safe, since we wait the rdkafka object anyway. - void * self = static_cast(this); - - int status; - - status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(), - "init", StorageKafkaInterceptors::rdKafkaOnNew, self); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - LOG_ERROR(log, "Cannot set new interceptor due to {} error", status); - - // cppkafka always copy the configuration - status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(), - "init", StorageKafkaInterceptors::rdKafkaOnConfDup, self); - if (status != RD_KAFKA_RESP_ERR_NO_ERROR) - LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status); - } -} - -void StorageKafka::updateConsumerConfiguration(cppkafka::Configuration & kafka_config) -{ - const auto & config = getContext()->getConfigRef(); - loadConsumerConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG, topics); -} - -void StorageKafka::updateProducerConfiguration(cppkafka::Configuration & kafka_config) -{ - const auto & config = getContext()->getConfigRef(); - loadProducerConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG, topics); -} - -bool StorageKafka::checkDependencies(const StorageID & table_id) -{ - // Check if all dependencies are attached - auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); - if (view_ids.empty()) - return true; - - // Check the dependencies are ready? - for (const auto & view_id : view_ids) - { - auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); - if (!view) - return false; - - // If it materialized view, check it's target table - auto * materialized_view = dynamic_cast(view.get()); - if (materialized_view && !materialized_view->tryGetTargetTable()) - return false; - - // Check all its dependencies - if (!checkDependencies(view_id)) - return false; - } - - return true; -} - void StorageKafka::threadFunc(size_t idx) { assert(idx < tasks.size()); @@ -1027,7 +517,7 @@ void StorageKafka::threadFunc(size_t idx) // Keep streaming as long as there are attached views and streaming is not cancelled while (!task->stream_cancelled) { - if (!checkDependencies(table_id)) + if (!StorageKafkaUtils::checkDependencies(table_id, getContext())) break; LOG_DEBUG(log, "Started streaming to {} attached views", num_views); @@ -1109,7 +599,7 @@ bool StorageKafka::streamToViews() /* allow_materialized */ false, /* no_squash */ true, /* no_destination */ true, - /* async_isnert */ false); + /* async_insert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream @@ -1167,164 +657,4 @@ bool StorageKafka::streamToViews() return some_stream_is_stalled; } - -void registerStorageKafka(StorageFactory & factory) -{ - auto creator_fn = [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - size_t args_count = engine_args.size(); - const bool has_settings = args.storage_def->settings; - - auto kafka_settings = std::make_unique(); - String collection_name; - if (auto named_collection = tryGetNamedCollectionWithOverrides(args.engine_args, args.getLocalContext())) - { - for (const auto & setting : kafka_settings->all()) - { - const auto & setting_name = setting.getName(); - if (named_collection->has(setting_name)) - kafka_settings->set(setting_name, named_collection->get(setting_name)); - } - collection_name = assert_cast(args.engine_args[0].get())->name(); - } - - if (has_settings) - { - kafka_settings->loadFromQuery(*args.storage_def); - } - - // Check arguments and settings - #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME, EVAL) \ - /* One of the four required arguments is not specified */ \ - if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && \ - !kafka_settings->PAR_NAME.changed) \ - { \ - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,\ - "Required parameter '{}' " \ - "for storage Kafka not specified", \ - #PAR_NAME); \ - } \ - if (args_count >= (ARG_NUM)) \ - { \ - /* The same argument is given in two places */ \ - if (has_settings && \ - kafka_settings->PAR_NAME.changed) \ - { \ - throw Exception(ErrorCodes::BAD_ARGUMENTS, \ - "The argument №{} of storage Kafka " \ - "and the parameter '{}' " \ - "in SETTINGS cannot be specified at the same time", \ - #ARG_NUM, #PAR_NAME); \ - } \ - /* move engine args to settings */ \ - else \ - { \ - if ((EVAL) == 1) \ - { \ - engine_args[(ARG_NUM)-1] = \ - evaluateConstantExpressionAsLiteral( \ - engine_args[(ARG_NUM)-1], \ - args.getLocalContext()); \ - } \ - if ((EVAL) == 2) \ - { \ - engine_args[(ARG_NUM)-1] = \ - evaluateConstantExpressionOrIdentifierAsLiteral( \ - engine_args[(ARG_NUM)-1], \ - args.getLocalContext()); \ - } \ - kafka_settings->PAR_NAME = \ - engine_args[(ARG_NUM)-1]->as().value; \ - } \ - } - - /** Arguments of engine is following: - * - Kafka broker list - * - List of topics - * - Group ID (may be a constant expression with a string result) - * - Message format (string) - * - Row delimiter - * - Schema (optional, if the format supports it) - * - Number of consumers - * - Max block size for background consumption - * - Skip (at least) unreadable messages number - * - Do intermediate commits when the batch consumed and handled - */ - - /* 0 = raw, 1 = evaluateConstantExpressionAsLiteral, 2=evaluateConstantExpressionOrIdentifierAsLiteral */ - /// In case of named collection we already validated the arguments. - if (collection_name.empty()) - { - CHECK_KAFKA_STORAGE_ARGUMENT(1, kafka_broker_list, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list, 1) - CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(9, kafka_skip_broken_messages, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(10, kafka_commit_every_batch, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(11, kafka_client_id, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(12, kafka_poll_timeout_ms, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(13, kafka_flush_interval_ms, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(14, kafka_thread_per_consumer, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(15, kafka_handle_error_mode, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(16, kafka_commit_on_select, 0) - CHECK_KAFKA_STORAGE_ARGUMENT(17, kafka_max_rows_per_message, 0) - } - - #undef CHECK_KAFKA_STORAGE_ARGUMENT - - auto num_consumers = kafka_settings->kafka_num_consumers.value; - auto max_consumers = std::max(getNumberOfPhysicalCPUCores(), 16); - - if (!args.getLocalContext()->getSettingsRef().kafka_disable_num_consumers_limit && num_consumers > max_consumers) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The number of consumers can not be bigger than {}. " - "A single consumer can read any number of partitions. " - "Extra consumers are relatively expensive, " - "and using a lot of them can lead to high memory and CPU usage. " - "To achieve better performance " - "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " - "and ensure you have enough threads " - "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " - "See also https://clickhouse.com/docs/en/integrations/kafka#tuning-performance", max_consumers); - } - else if (num_consumers < 1) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be lower than 1"); - } - - if (kafka_settings->kafka_max_block_size.changed && kafka_settings->kafka_max_block_size.value < 1) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_max_block_size can not be lower than 1"); - } - - if (kafka_settings->kafka_poll_max_batch_size.changed && kafka_settings->kafka_poll_max_batch_size.value < 1) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); - } - NamesAndTypesList supported_columns; - for (const auto & column : args.columns) - { - if (column.default_desc.kind == ColumnDefaultKind::Alias) - supported_columns.emplace_back(column.name, column.type); - if (column.default_desc.kind == ColumnDefaultKind::Default && !column.default_desc.expression) - supported_columns.emplace_back(column.name, column.type); - } - // Kafka engine allows only ordinary columns without default expression or alias columns. - if (args.columns.getAll() != supported_columns) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL expressions for columns. " - "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); - } - - return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); - }; - - factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); -} - } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 31e1a6076b6..966d818d675 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -23,7 +23,8 @@ class ReadFromStorageKafka; class StorageSystemKafkaConsumers; class ThreadStatus; -struct StorageKafkaInterceptors; +template +struct KafkaInterceptors; using KafkaConsumerPtr = std::shared_ptr; using ConsumerPtr = std::shared_ptr; @@ -33,7 +34,8 @@ using ConsumerPtr = std::shared_ptr; */ class StorageKafka final : public IStorage, WithContext { - friend struct StorageKafkaInterceptors; + using KafkaInterceptors = KafkaInterceptors; + friend KafkaInterceptors; public: StorageKafka( @@ -133,7 +135,6 @@ private: std::mutex thread_statuses_mutex; std::list> thread_statuses; - SettingsChanges createSettingsAdjustments(); /// Creates KafkaConsumer object without real consumer (cppkafka::Consumer) KafkaConsumerPtr createKafkaConsumer(size_t consumer_number); /// Returns full consumer related configuration, also the configuration @@ -148,33 +149,15 @@ private: std::atomic shutdown_called = false; - // Load Kafka global configuration - // https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md#global-configuration-properties - void updateGlobalConfiguration(cppkafka::Configuration & kafka_config); - // Load Kafka properties from consumer configuration - // NOTE: librdkafka allow to set a consumer property to a producer and vice versa, - // but a warning will be generated e.g: - // "Configuration property session.timeout.ms is a consumer property and - // will be ignored by this producer instance" - void updateConsumerConfiguration(cppkafka::Configuration & kafka_config); - // Load Kafka properties from producer configuration - void updateProducerConfiguration(cppkafka::Configuration & kafka_config); - void threadFunc(size_t idx); size_t getPollMaxBatchSize() const; size_t getMaxBlockSize() const; size_t getPollTimeoutMillisecond() const; - static Names parseTopics(String topic_list); - static String getDefaultClientId(const StorageID & table_id_); - bool streamToViews(); - bool checkDependencies(const StorageID & table_id); void cleanConsumers(); - - static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/Kafka/StorageKafka2.cpp b/src/Storages/Kafka/StorageKafka2.cpp new file mode 100644 index 00000000000..3574b46e3b0 --- /dev/null +++ b/src/Storages/Kafka/StorageKafka2.cpp @@ -0,0 +1,1289 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace CurrentMetrics +{ +// TODO: Add proper metrics, similar to old StorageKafka +extern const Metric KafkaBackgroundReads; +extern const Metric KafkaWrites; +} + +namespace ProfileEvents +{ +extern const Event KafkaBackgroundReads; +extern const Event KafkaMessagesRead; +extern const Event KafkaMessagesFailed; +extern const Event KafkaRowsRead; +extern const Event KafkaWrites; +} + + +namespace DB +{ + +namespace fs = std::filesystem; + +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int REPLICA_ALREADY_EXISTS; +extern const int TABLE_IS_DROPPED; +extern const int NO_ZOOKEEPER; +extern const int REPLICA_IS_ALREADY_ACTIVE; +} + +namespace +{ +constexpr auto MAX_FAILED_POLL_ATTEMPTS = 10; +constexpr auto MAX_TIME_TO_WAIT_FOR_ASSIGNMENT_MS = 15000; +} + +StorageKafka2::StorageKafka2( + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + const String & comment, + std::unique_ptr kafka_settings_, + const String & collection_name_) + : IStorage(table_id_) + , WithContext(context_->getGlobalContext()) + , keeper(getContext()->getZooKeeper()) + , keeper_path(kafka_settings_->kafka_keeper_path.value) + , replica_path(keeper_path + "/replicas/" + kafka_settings_->kafka_replica_name.value) + , kafka_settings(std::move(kafka_settings_)) + , macros_info{.table_id = table_id_} + , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) + , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) + , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) + , client_id( + kafka_settings->kafka_client_id.value.empty() + ? StorageKafkaUtils::getDefaultClientId(table_id_) + : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) + , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) + , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) + , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) + , num_consumers(kafka_settings->kafka_num_consumers.value) + , log(getLogger("StorageKafka2 (" + table_id_.getNameForLogs() + ")")) + , semaphore(0, static_cast(num_consumers)) + , settings_adjustments(StorageKafkaUtils::createSettingsAdjustments(*kafka_settings, schema_name)) + , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) + , collection_name(collection_name_) + , active_node_identifier(toString(ServerUUID::get())) +{ + if (kafka_settings->kafka_num_consumers > 1 && !thread_per_consumer) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "With multiple consumers, it is required to use `kafka_thread_per_consumer` setting"); + + if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) + { + kafka_settings->input_format_allow_errors_num = 0; + kafka_settings->input_format_allow_errors_ratio = 0; + } + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); + setVirtuals(StorageKafkaUtils::createVirtuals(kafka_settings->kafka_handle_error_mode)); + + auto task_count = thread_per_consumer ? num_consumers : 1; + for (size_t i = 0; i < task_count; ++i) + { + auto task = getContext()->getMessageBrokerSchedulePool().createTask(log->name(), [this, i] { threadFunc(i); }); + task->deactivate(); + tasks.emplace_back(std::make_shared(std::move(task))); + } + + const auto first_replica = createTableIfNotExists(); + + if (!first_replica) + createReplica(); + + activating_task = getContext()->getSchedulePool().createTask(log->name() + "(activating task)", [this]() { activateAndReschedule(); }); + activating_task->deactivate(); +} + +void StorageKafka2::partialShutdown() +{ + // This is called in a background task within a catch block, thus this function shouldn't throw + LOG_TRACE(log, "Cancelling streams"); + for (auto & task : tasks) + { + task->stream_cancelled = true; + } + + LOG_TRACE(log, "Waiting for cleanup"); + for (auto & task : tasks) + { + task->holder->deactivate(); + } + is_active = false; +} + +bool StorageKafka2::activate() +{ + LOG_TEST(log, "Activate task"); + if (is_active && !getZooKeeper()->expired()) + { + LOG_TEST(log, "No need to activate"); + return true; + } + + if (!is_active) + { + LOG_WARNING(log, "Table was not active. Will try to activate it"); + } + else if (getZooKeeper()->expired()) + { + LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session"); + partialShutdown(); + } + else + { + UNREACHABLE(); + } + + try + { + setZooKeeper(); + } + catch (const Coordination::Exception &) + { + /// The exception when you try to zookeeper_init usually happens if DNS does not work or the connection with ZK fails + tryLogCurrentException(log, "Failed to establish a new ZK connection. Will try again"); + assert(!is_active); + return false; + } + + if (shutdown_called) + return false; + + auto activate_in_keeper = [this]() + { + try + { + auto zookeeper = getZooKeeper(); + + String is_active_path = fs::path(replica_path) / "is_active"; + zookeeper->deleteEphemeralNodeIfContentMatches(is_active_path, active_node_identifier); + + try + { + /// Simultaneously declare that this replica is active, and update the host. + zookeeper->create(is_active_path, active_node_identifier, zkutil::CreateMode::Ephemeral); + } + catch (const Coordination::Exception & e) + { + if (e.code == Coordination::Error::ZNODEEXISTS) + throw Exception( + ErrorCodes::REPLICA_IS_ALREADY_ACTIVE, + "Replica {} appears to be already active. If you're sure it's not, " + "try again in a minute or remove znode {}/is_active manually", + replica_path, + replica_path); + + throw; + } + replica_is_active_node = zkutil::EphemeralNodeHolder::existing(is_active_path, *zookeeper); + + return true; + } + catch (const Coordination::Exception & e) + { + replica_is_active_node = nullptr; + LOG_ERROR(log, "Couldn't start replica: {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); + return false; + + } + catch (const Exception & e) + { + replica_is_active_node = nullptr; + if (e.code() != ErrorCodes::REPLICA_IS_ALREADY_ACTIVE) + throw; + + LOG_ERROR(log, "Couldn't start replica: {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); + return false; + } + }; + + if (!activate_in_keeper()) + { + assert(!is_active); + return false; + } + + is_active = true; + + // Start the reader threads + for (auto & task : tasks) + { + task->stream_cancelled = false; + task->holder->activateAndSchedule(); + } + + LOG_DEBUG(log, "Table activated successfully"); + return true; +} + +void StorageKafka2::activateAndReschedule() +{ + if (shutdown_called) + return; + + /// It would be ideal to introduce a setting for this + constexpr static size_t check_period_ms = 60000; + /// In case of any exceptions we want to rerun the this task as fast as possible but we also don't want to keep retrying immediately + /// in a close loop (as fast as tasks can be processed), so we'll retry in between 100 and 10000 ms + const size_t backoff_ms = 100 * ((consecutive_activate_failures + 1) * (consecutive_activate_failures + 2)) / 2; + const size_t next_failure_retry_ms = std::min(size_t{10000}, backoff_ms); + + try + { + bool replica_is_active = activate(); + if (replica_is_active) + { + consecutive_activate_failures = 0; + activating_task->scheduleAfter(check_period_ms); + } + else + { + consecutive_activate_failures++; + activating_task->scheduleAfter(next_failure_retry_ms); + } + } + catch (...) + { + consecutive_activate_failures++; + activating_task->scheduleAfter(next_failure_retry_ms); + + /// We couldn't activate table let's set it into readonly mode if necessary + /// We do this after scheduling the task in case it throws + partialShutdown(); + tryLogCurrentException(log, "Failed to restart the table. Will try again"); + } +} + +void StorageKafka2::assertActive() const +{ + // TODO(antaljanosbenjamin): change LOGICAL_ERROR to something sensible + if (!is_active) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table is not active (replica path: {})", replica_path); +} + + +Pipe StorageKafka2::read( + const Names & /*column_names */, + const StorageSnapshotPtr & /* storage_snapshot */, + SelectQueryInfo & /* query_info */, + ContextPtr /* local_context */, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + size_t /* num_streams */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Direct read from the new Kafka storage is not implemented"); +} + + +SinkToStoragePtr +StorageKafka2::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) +{ + auto modified_context = Context::createCopy(local_context); + modified_context->applySettingsChanges(settings_adjustments); + + CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaWrites}; + ProfileEvents::increment(ProfileEvents::KafkaWrites); + + if (topics.size() > 1) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't write to Kafka table with multiple topics!"); + + cppkafka::Configuration conf = getProducerConfiguration(); + + const Settings & settings = getContext()->getSettingsRef(); + size_t poll_timeout = settings.stream_poll_timeout_ms.totalMilliseconds(); + const auto & header = metadata_snapshot->getSampleBlockNonMaterialized(); + + auto producer = std::make_unique( + std::make_shared(conf), topics[0], std::chrono::milliseconds(poll_timeout), shutdown_called, header); + + LOG_TRACE(log, "Kafka producer created"); + + size_t max_rows = max_rows_per_message; + /// Need for backward compatibility. + if (format_name == "Avro" && local_context->getSettingsRef().output_format_avro_rows_in_file.changed) + max_rows = local_context->getSettingsRef().output_format_avro_rows_in_file.value; + return std::make_shared(header, getFormatName(), max_rows, std::move(producer), getName(), modified_context); +} + +void StorageKafka2::startup() +{ + for (size_t i = 0; i < num_consumers; ++i) + { + try + { + consumers.push_back(ConsumerAndAssignmentInfo{.consumer = createConsumer(i), .keeper = getZooKeeper()}); + LOG_DEBUG(log, "Created #{} consumer", num_created_consumers); + ++num_created_consumers; + + consumers.back().consumer->subscribeIfNotSubscribedYet(); + } + catch (const cppkafka::Exception &) + { + tryLogCurrentException(log); + } + } + activating_task->activateAndSchedule(); +} + + +void StorageKafka2::shutdown(bool) +{ + shutdown_called = true; + activating_task->deactivate(); + partialShutdown(); + LOG_TRACE(log, "Closing consumers"); + consumers.clear(); + LOG_TRACE(log, "Consumers closed"); +} + +void StorageKafka2::drop() +{ + dropReplica(); +} + +KafkaConsumer2Ptr StorageKafka2::createConsumer(size_t consumer_number) +{ + // Create a consumer and subscribe to topics + auto consumer_impl = std::make_shared(getConsumerConfiguration(consumer_number)); + consumer_impl->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); + + /// NOTE: we pass |stream_cancelled| by reference here, so the buffers should not outlive the storage. + chassert((thread_per_consumer || num_consumers == 1) && "StorageKafka2 cannot handle multiple consumers on a single thread"); + auto & stream_cancelled = tasks[consumer_number]->stream_cancelled; + return std::make_shared( + consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), stream_cancelled, topics); + +} + + +cppkafka::Configuration StorageKafka2::getConsumerConfiguration(size_t consumer_number) +{ + KafkaConfigLoader::ConsumerConfigParams params{ + {getContext()->getConfigRef(), collection_name, topics, log}, + brokers, + group, + num_consumers > 1, + consumer_number, + client_id, + getMaxBlockSize()}; + auto kafka_config = KafkaConfigLoader::getConsumerConfiguration(*this, params); + // It is disabled, because in case of no materialized views are attached, it can cause live memory leak. To enable it, a similar cleanup mechanism must be introduced as for StorageKafka. + kafka_config.set("statistics.interval.ms", "0"); + return kafka_config; +} + +cppkafka::Configuration StorageKafka2::getProducerConfiguration() +{ + KafkaConfigLoader::ProducerConfigParams params{ + {getContext()->getConfigRef(), collection_name, topics, log}, + brokers, + client_id}; + return KafkaConfigLoader::getProducerConfiguration(*this, params); +} + +size_t StorageKafka2::getMaxBlockSize() const +{ + return kafka_settings->kafka_max_block_size.changed ? kafka_settings->kafka_max_block_size.value + : (getContext()->getSettingsRef().max_insert_block_size.value / num_consumers); +} + +size_t StorageKafka2::getPollMaxBatchSize() const +{ + size_t batch_size = kafka_settings->kafka_poll_max_batch_size.changed ? kafka_settings->kafka_poll_max_batch_size.value + : getContext()->getSettingsRef().max_block_size.value; + + return std::min(batch_size, getMaxBlockSize()); +} + +size_t StorageKafka2::getPollTimeoutMillisecond() const +{ + return kafka_settings->kafka_poll_timeout_ms.changed ? kafka_settings->kafka_poll_timeout_ms.totalMilliseconds() + : getContext()->getSettingsRef().stream_poll_timeout_ms.totalMilliseconds(); +} + +namespace +{ +const std::string lock_file_name{"lock"}; +const std::string commit_file_name{"committed"}; +const std::string intent_file_name{"intention"}; + +std::optional getNumber(zkutil::ZooKeeper & keeper, const fs::path & path) +{ + std::string result; + if (!keeper.tryGet(path, result)) + return std::nullopt; + + return DB::parse(result); +} +} + +bool StorageKafka2::createTableIfNotExists() +{ + // Heavily based on StorageReplicatedMergeTree::createTableIfNotExists + const auto my_keeper_path = fs::path(keeper_path); + const auto replicas_path = my_keeper_path / "replicas"; + + for (auto i = 0; i < 1000; ++i) + { + if (keeper->exists(replicas_path)) + { + LOG_DEBUG(log, "This table {} is already created, will add new replica", keeper_path); + return false; + } + + /// There are leftovers from incompletely dropped table. + if (keeper->exists(my_keeper_path / "dropped")) + { + /// This condition may happen when the previous drop attempt was not completed + /// or when table is dropped by another replica right now. + /// This is Ok because another replica is definitely going to drop the table. + + LOG_WARNING(log, "Removing leftovers from table {}", keeper_path); + String drop_lock_path = my_keeper_path / "dropped" / "lock"; + Coordination::Error code = keeper->tryCreate(drop_lock_path, "", zkutil::CreateMode::Ephemeral); + + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) + { + LOG_WARNING(log, "The leftovers from table {} were removed by another replica", keeper_path); + } + else if (code != Coordination::Error::ZOK) + { + throw Coordination::Exception::fromPath(code, drop_lock_path); + } + else + { + auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(drop_lock_path, *keeper); + if (!removeTableNodesFromZooKeeper(keeper, metadata_drop_lock)) + { + /// Someone is recursively removing table right now, we cannot create new table until old one is removed + continue; + } + } + } + + keeper->createAncestors(keeper_path); + Coordination::Requests ops; + + ops.emplace_back(zkutil::makeCreateRequest(keeper_path, "", zkutil::CreateMode::Persistent)); + + const auto topics_path = my_keeper_path / "topics"; + ops.emplace_back(zkutil::makeCreateRequest(topics_path, "", zkutil::CreateMode::Persistent)); + + for (const auto & topic : topics) + { + LOG_DEBUG(log, "Creating path in keeper for topic {}", topic); + + const auto topic_path = topics_path / topic; + ops.emplace_back(zkutil::makeCreateRequest(topic_path, "", zkutil::CreateMode::Persistent)); + + const auto partitions_path = topic_path / "partitions"; + ops.emplace_back(zkutil::makeCreateRequest(partitions_path, "", zkutil::CreateMode::Persistent)); + } + + // Create the first replica + ops.emplace_back(zkutil::makeCreateRequest(replicas_path, "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path, "", zkutil::CreateMode::Persistent)); + + + Coordination::Responses responses; + const auto code = keeper->tryMulti(ops, responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO(log, "It looks like the table {} was created by another replica at the same moment, will retry", keeper_path); + continue; + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + + LOG_INFO(log, "Table {} created successfully ", keeper_path); + + return true; + } + + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zookeeper_path or because of logical error"); +} + + +bool StorageKafka2::removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr keeper_to_use, const zkutil::EphemeralNodeHolder::Ptr & drop_lock) +{ + bool completely_removed = false; + + Strings children; + if (const auto code = keeper_to_use->tryGetChildren(keeper_path, children); code == Coordination::Error::ZNONODE) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal. It's a bug"); + + const auto my_keeper_path = fs::path(keeper_path); + for (const auto & child : children) + if (child != "dropped") + keeper_to_use->tryRemoveRecursive(my_keeper_path / child); + + Coordination::Requests ops; + Coordination::Responses responses; + ops.emplace_back(zkutil::makeRemoveRequest(drop_lock->getPath(), -1)); + ops.emplace_back(zkutil::makeRemoveRequest(my_keeper_path / "dropped", -1)); + ops.emplace_back(zkutil::makeRemoveRequest(my_keeper_path, -1)); + const auto code = keeper_to_use->tryMulti(ops, responses, /* check_session_valid */ true); + + if (code == Coordination::Error::ZNONODE) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, "There is a race condition between creation and removal of replicated table. It's a bug"); + } + else if (code == Coordination::Error::ZNOTEMPTY) + { + LOG_ERROR( + log, + "Table was not completely removed from Keeper, {} still exists and may contain some garbage," + "but someone is removing it right now.", + keeper_path); + } + else if (code != Coordination::Error::ZOK) + { + /// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation. + zkutil::KeeperMultiException::check(code, ops, responses); + } + else + { + drop_lock->setAlreadyRemoved(); + completely_removed = true; + LOG_INFO(log, "Table {} was successfully removed from ZooKeeper", keeper_path); + } + + return completely_removed; +} + +void StorageKafka2::createReplica() +{ + LOG_INFO(log, "Creating replica {}", replica_path); + // TODO: This can cause issues if a new table is created with the same path. To make this work, we should store some metadata + // about the table to be able to identify that the same table is created, not a new one. + const auto code = keeper->tryCreate(replica_path, "", zkutil::CreateMode::Persistent); + + switch (code) + { + case Coordination::Error::ZNODEEXISTS: + LOG_INFO(log, "Replica {} already exists, will try to use it", replica_path); + break; + case Coordination::Error::ZOK: + LOG_INFO(log, "Replica {} created", replica_path); + break; + case Coordination::Error::ZNONODE: + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} was suddenly removed", keeper_path); + default: + throw Coordination::Exception::fromPath(code, replica_path); + } +} + + +void StorageKafka2::dropReplica() +{ + LOG_INFO(log, "Trying to drop replica {}", replica_path); + auto my_keeper = getZooKeeperIfTableShutDown(); + + LOG_INFO(log, "Removing replica {}", replica_path); + + if (!my_keeper->exists(replica_path)) + { + LOG_INFO(log, "Removing replica {} does not exist", replica_path); + return; + } + + my_keeper->tryRemoveChildrenRecursive(replica_path); + + if (my_keeper->tryRemove(replica_path) != Coordination::Error::ZOK) + LOG_ERROR(log, "Replica was not completely removed from Keeper, {} still exists and may contain some garbage.", replica_path); + + /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. + Strings replicas; + if (Coordination::Error::ZOK != my_keeper->tryGetChildren(keeper_path + "/replicas", replicas) || !replicas.empty()) + return; + + LOG_INFO(log, "{} is the last replica, will remove table", replica_path); + + /** At this moment, another replica can be created and we cannot remove the table. + * Try to remove /replicas node first. If we successfully removed it, + * it guarantees that we are the only replica that proceed to remove the table + * and no new replicas can be created after that moment (it requires the existence of /replicas node). + * and table cannot be recreated with new /replicas node on another servers while we are removing data, + * because table creation is executed in single transaction that will conflict with remaining nodes. + */ + + /// Node /dropped works like a lock that protects from concurrent removal of old table and creation of new table. + /// But recursive removal may fail in the middle of operation leaving some garbage in zookeeper_path, so + /// we remove it on table creation if there is /dropped node. Creating thread may remove /dropped node created by + /// removing thread, and it causes race condition if removing thread is not finished yet. + /// To avoid this we also create ephemeral child before starting recursive removal. + /// (The existence of child node does not allow to remove parent node). + Coordination::Requests ops; + Coordination::Responses responses; + fs::path my_keeper_path = keeper_path; + String drop_lock_path = my_keeper_path / "dropped" / "lock"; + ops.emplace_back(zkutil::makeRemoveRequest(my_keeper_path / "replicas", -1)); + ops.emplace_back(zkutil::makeCreateRequest(my_keeper_path / "dropped", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(drop_lock_path, "", zkutil::CreateMode::Ephemeral)); + Coordination::Error code = my_keeper->tryMulti(ops, responses); + + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) + { + LOG_WARNING(log, "Table {} is already started to be removing by another replica right now", replica_path); + } + else if (code == Coordination::Error::ZNOTEMPTY) + { + LOG_WARNING(log, "Another replica was suddenly created, will keep the table {}", replica_path); + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + else + { + auto drop_lock = zkutil::EphemeralNodeHolder::existing(drop_lock_path, *my_keeper); + LOG_INFO(log, "Removing table {} (this might take several minutes)", keeper_path); + removeTableNodesFromZooKeeper(my_keeper, drop_lock); + } +} + +std::optional +StorageKafka2::lockTopicPartitions(zkutil::ZooKeeper & keeper_to_use, const TopicPartitions & topic_partitions) +{ + std::vector topic_partition_paths; + topic_partition_paths.reserve(topic_partitions.size()); + for (const auto & topic_partition : topic_partitions) + topic_partition_paths.emplace_back(getTopicPartitionPath(topic_partition)); + + Coordination::Requests ops; + + static constexpr auto ignore_if_exists = true; + + for (const auto & topic_partition_path : topic_partition_paths) + { + const auto lock_file_path = String(topic_partition_path / lock_file_name); + LOG_TRACE(log, "Creating locking ops for: {}", lock_file_path); + ops.push_back(zkutil::makeCreateRequest(topic_partition_path, "", zkutil::CreateMode::Persistent, ignore_if_exists)); + ops.push_back(zkutil::makeCreateRequest(lock_file_path, kafka_settings->kafka_replica_name.value, zkutil::CreateMode::Ephemeral)); + } + Coordination::Responses responses; + + if (const auto code = keeper_to_use.tryMulti(ops, responses); code != Coordination::Error::ZOK) + { + if (code != Coordination::Error::ZNODEEXISTS) + zkutil::KeeperMultiException::check(code, ops, responses); + + // Possible optimization: check the content of lock files, if we locked them, then we can clean them up and retry to lock them. + return std::nullopt; + } + + // We have the locks, let's gather the information we needed + TopicPartitionLocks locks; + { + auto tp_it = topic_partitions.begin(); + auto path_it = topic_partition_paths.begin(); + for (; tp_it != topic_partitions.end(); ++tp_it, ++path_it) + { + using zkutil::EphemeralNodeHolder; + LockedTopicPartitionInfo lock_info{ + EphemeralNodeHolder::existing(*path_it / lock_file_name, keeper_to_use), + getNumber(keeper_to_use, *path_it / commit_file_name), + getNumber(keeper_to_use, *path_it / intent_file_name)}; + + LOG_TRACE( + log, + "Locked topic partition: {}:{} at offset {} with intent size {}", + tp_it->topic, + tp_it->partition_id, + lock_info.committed_offset.value_or(0), + lock_info.intent_size.value_or(0)); + locks.emplace(TopicPartition(*tp_it), std::move(lock_info)); + } + } + + return locks; +} + + +void StorageKafka2::saveCommittedOffset(zkutil::ZooKeeper & keeper_to_use, const TopicPartition & topic_partition) +{ + const auto partition_prefix = getTopicPartitionPath(topic_partition); + keeper_to_use.createOrUpdate(partition_prefix / commit_file_name, toString(topic_partition.offset), zkutil::CreateMode::Persistent); + // This is best effort, if it fails we will try to remove in the next round + keeper_to_use.tryRemove(partition_prefix / intent_file_name, -1); + LOG_TEST( + log, "Saved offset {} for topic-partition [{}:{}]", topic_partition.offset, topic_partition.topic, topic_partition.partition_id); +} + +void StorageKafka2::saveIntent(zkutil::ZooKeeper & keeper_to_use, const TopicPartition & topic_partition, int64_t intent) +{ + LOG_TEST( + log, + "Saving intent of {} for topic-partition [{}:{}] at offset {}", + intent, + topic_partition.topic, + topic_partition.partition_id, + topic_partition.offset); + keeper_to_use.createOrUpdate( + getTopicPartitionPath(topic_partition) / intent_file_name, toString(intent), zkutil::CreateMode::Persistent); +} + + +StorageKafka2::PolledBatchInfo StorageKafka2::pollConsumer( + KafkaConsumer2 & consumer, + const TopicPartition & topic_partition, + std::optional message_count, + Stopwatch & total_stopwatch, + const ContextPtr & modified_context) +{ + LOG_TEST(log, "Polling consumer"); + PolledBatchInfo batch_info; + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + Block non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()); + auto virtual_header = getVirtualsHeader(); + + // now it's one-time usage InputStream + // one block of the needed size (or with desired flush timeout) is formed in one internal iteration + // otherwise external iteration will reuse that and logic will became even more fuzzy + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + + auto put_error_to_stream = kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM; + + EmptyReadBuffer empty_buf; + auto input_format = FormatFactory::instance().getInput( + getFormatName(), empty_buf, non_virtual_header, modified_context, getMaxBlockSize(), std::nullopt, 1); + + std::optional exception_message; + size_t total_rows = 0; + size_t failed_poll_attempts = 0; + + auto on_error = [&](const MutableColumns & result_columns, Exception & e) + { + ProfileEvents::increment(ProfileEvents::KafkaMessagesFailed); + + if (put_error_to_stream) + { + exception_message = e.message(); + for (const auto & column : result_columns) + { + // read_kafka_message could already push some rows to result_columns + // before exception, we need to fix it. + auto cur_rows = column->size(); + if (cur_rows > total_rows) + column->popBack(cur_rows - total_rows); + + // all data columns will get default value in case of error + column->insertDefault(); + } + + return 1; + } + else + { + e.addMessage( + "while parsing Kafka message (topic: {}, partition: {}, offset: {})'", + consumer.currentTopic(), + consumer.currentPartition(), + consumer.currentOffset()); + throw std::move(e); + } + }; + + StreamingFormatExecutor executor(non_virtual_header, input_format, std::move(on_error)); + + + Poco::Timespan max_execution_time = kafka_settings->kafka_flush_interval_ms.changed + ? kafka_settings->kafka_flush_interval_ms + : getContext()->getSettingsRef().stream_flush_interval_ms; + + const auto check_time_limit = [&max_execution_time, &total_stopwatch]() + { + if (max_execution_time != 0) + { + auto elapsed_ns = total_stopwatch.elapsed(); + + if (elapsed_ns > static_cast(max_execution_time.totalMicroseconds()) * 1000) + return false; + } + + return true; + }; + + while (true) + { + size_t new_rows = 0; + exception_message.reset(); + if (auto buf = consumer.consume(topic_partition, message_count)) + { + ProfileEvents::increment(ProfileEvents::KafkaMessagesRead); + new_rows = executor.execute(*buf); + } + + if (new_rows) + { + ProfileEvents::increment(ProfileEvents::KafkaRowsRead, new_rows); + + const auto & header_list = consumer.currentHeaderList(); + + Array headers_names; + Array headers_values; + + if (!header_list.empty()) + { + headers_names.reserve(header_list.size()); + headers_values.reserve(header_list.size()); + for (const auto & header : header_list) + { + headers_names.emplace_back(header.get_name()); + headers_values.emplace_back(static_cast(header.get_value())); + } + } + + for (size_t i = 0; i < new_rows; ++i) + { + virtual_columns[0]->insert(consumer.currentTopic()); + virtual_columns[1]->insert(consumer.currentKey()); + virtual_columns[2]->insert(consumer.currentOffset()); + virtual_columns[3]->insert(consumer.currentPartition()); + + + auto timestamp_raw = consumer.currentTimestamp(); + if (timestamp_raw) + { + auto ts = timestamp_raw->get_timestamp(); + virtual_columns[4]->insert(std::chrono::duration_cast(ts).count()); + virtual_columns[5]->insert( + DecimalField(std::chrono::duration_cast(ts).count(), 3)); + } + else + { + virtual_columns[4]->insertDefault(); + virtual_columns[5]->insertDefault(); + } + virtual_columns[6]->insert(headers_names); + virtual_columns[7]->insert(headers_values); + if (put_error_to_stream) + { + if (exception_message) + { + virtual_columns[8]->insert(consumer.currentPayload()); + virtual_columns[9]->insert(*exception_message); + } + else + { + virtual_columns[8]->insertDefault(); + virtual_columns[9]->insertDefault(); + } + } + } + + total_rows = total_rows + new_rows; + batch_info.last_offset = consumer.currentOffset(); + } + else if (consumer.isStalled()) + { + ++failed_poll_attempts; + } + else + { + // We came here in case of tombstone (or sometimes zero-length) messages, and it is not something abnormal + // TODO: it seems like in case of put_error_to_stream=true we may need to process those differently + // currently we just skip them with note in logs. + LOG_DEBUG( + log, + "Parsing of message (topic: {}, partition: {}, offset: {}) return no rows.", + consumer.currentTopic(), + consumer.currentPartition(), + consumer.currentOffset()); + } + + if (!consumer.hasMorePolledMessages() + && (total_rows >= getMaxBlockSize() || !check_time_limit() || failed_poll_attempts >= MAX_FAILED_POLL_ATTEMPTS + || consumer.needsOffsetUpdate())) + { + LOG_TRACE( + log, + "Stopped collecting message for current batch. There are {} failed polled attempts, {} total rows and consumer needs " + "offset update is {}", + failed_poll_attempts, + total_rows, + consumer.needsOffsetUpdate()); + break; + } + } + + if (total_rows == 0) + return {}; + + /// MATERIALIZED columns can be added here, but I think + // they are not needed here: + // and it's misleading to use them here, + // as columns 'materialized' that way stays 'ephemeral' + // i.e. will not be stored anywhere + // If needed any extra columns can be added using DEFAULT they can be added at MV level if needed. + + auto result_block = non_virtual_header.cloneWithColumns(executor.getResultColumns()); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + result_block.insert(column); + + batch_info.blocks.emplace_back(std::move(result_block)); + return batch_info; +} + +void StorageKafka2::threadFunc(size_t idx) +{ + chassert(idx < tasks.size()); + auto task = tasks[idx]; + std::optional maybe_stall_reason; + try + { + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size(); + if (num_views) + { + auto start_time = std::chrono::steady_clock::now(); + + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!task->stream_cancelled && num_created_consumers > 0) + { + maybe_stall_reason.reset(); + if (!StorageKafkaUtils::checkDependencies(table_id, getContext())) + break; + + LOG_DEBUG(log, "Started streaming to {} attached views", num_views); + + // Exit the loop & reschedule if some stream stalled + if (maybe_stall_reason = streamToViews(idx); maybe_stall_reason.has_value()) + { + LOG_TRACE(log, "Stream stalled."); + break; + } + + auto ts = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(ts - start_time); + if (duration.count() > KAFKA_MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule."); + break; + } + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + if (!task->stream_cancelled) + { + // Keeper related problems should be solved relatively fast, it makes sense wait less time + if (maybe_stall_reason.has_value() + && (*maybe_stall_reason == StallReason::KeeperSessionEnded || *maybe_stall_reason == StallReason::CouldNotAcquireLocks)) + task->holder->scheduleAfter(KAFKA_RESCHEDULE_MS / 10); + else + task->holder->scheduleAfter(KAFKA_RESCHEDULE_MS); + } +} + +std::optional StorageKafka2::streamToViews(size_t idx) +{ + // This function is written assuming that each consumer has their own thread. This means once this is changed, this function should be revisited. + // The return values should be revisited, as stalling all consumers because of a single one stalled is not a good idea. + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (!table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); + + CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaBackgroundReads}; + ProfileEvents::increment(ProfileEvents::KafkaBackgroundReads); + + auto & consumer_info = consumers[idx]; + consumer_info.watch.restart(); + auto & consumer = consumer_info.consumer; + // In case the initial subscribe in startup failed, let's subscribe now + consumer->subscribeIfNotSubscribedYet(); + + // To keep the consumer alive + const auto wait_for_assignment = consumer_info.locks.empty(); + LOG_TRACE(log, "Polling consumer {} for events", idx); + consumer->pollEvents(); + + if (wait_for_assignment) + { + while (nullptr == consumer->getKafkaAssignment() && consumer_info.watch.elapsedMilliseconds() < MAX_TIME_TO_WAIT_FOR_ASSIGNMENT_MS) + consumer->pollEvents(); + LOG_INFO(log, "Consumer has assignment: {}", nullptr == consumer->getKafkaAssignment()); + } + + try + { + if (consumer->needsOffsetUpdate() || consumer_info.locks.empty()) + { + LOG_TRACE(log, "Consumer needs update offset"); + // First release the locks so let other consumers acquire them ASAP + consumer_info.locks.clear(); + consumer_info.topic_partitions.clear(); + + const auto * current_assignment = consumer->getKafkaAssignment(); + if (current_assignment == nullptr) + { + // The consumer lost its assignment and haven't received a new one. + // By returning true this function reports the current consumer as a "stalled" stream, which + LOG_TRACE(log, "No assignment"); + return StallReason::NoAssignment; + } + consumer_info.consume_from_topic_partition_index = 0; + + if (consumer_info.keeper->expired()) + { + consumer_info.keeper = getZooKeeperAndAssertActive(); + LOG_TEST(log, "Got new zookeeper"); + } + + auto maybe_locks = lockTopicPartitions(*consumer_info.keeper, *current_assignment); + + if (!maybe_locks.has_value()) + { + // We couldn't acquire locks, probably some other consumers are still holding them. + LOG_TRACE(log, "Couldn't acquire locks"); + return StallReason::CouldNotAcquireLocks; + } + + consumer_info.locks = std::move(*maybe_locks); + + consumer_info.topic_partitions.reserve(current_assignment->size()); + for (const auto & topic_partition : *current_assignment) + { + TopicPartition topic_partition_copy{topic_partition}; + if (const auto & maybe_committed_offset = consumer_info.locks.at(topic_partition).committed_offset; + maybe_committed_offset.has_value()) + { + topic_partition_copy.offset = *maybe_committed_offset; + } + // in case no saved offset, we will get the offset from Kafka as a best effort. This is important to not to duplicate message when recreating the table. + + consumer_info.topic_partitions.push_back(std::move(topic_partition_copy)); + } + consumer_info.consumer->updateOffsets(consumer_info.topic_partitions); + } + + if (consumer_info.topic_partitions.empty()) + { + LOG_TRACE(log, "Consumer {} has assignment, but has no partitions, probably because there are more consumers in the consumer group than partitions.", idx); + return StallReason::NoPartitions; + } + LOG_TRACE(log, "Trying to consume from consumer {}", idx); + const auto maybe_rows = streamFromConsumer(consumer_info); + if (maybe_rows.has_value()) + { + const auto milliseconds = consumer_info.watch.elapsedMilliseconds(); + LOG_DEBUG( + log, "Pushing {} rows to {} took {} ms.", formatReadableQuantity(*maybe_rows), table_id.getNameForLogs(), milliseconds); + } + else + { + LOG_DEBUG(log, "Couldn't stream any messages"); + return StallReason::NoMessages; + } + } + catch (const zkutil::KeeperException & e) + { + if (Coordination::isHardwareError(e.code)) + { + LOG_INFO(log, "Cleaning up topic-partitions locks because of exception: {}", e.displayText()); + consumer_info.locks.clear(); + activating_task->schedule(); + return StallReason::KeeperSessionEnded; + } + + throw; + } + return {}; +} + + +std::optional StorageKafka2::streamFromConsumer(ConsumerAndAssignmentInfo & consumer_info) +{ + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = getStorageID(); + + auto kafka_context = Context::createCopy(getContext()); + kafka_context->makeQueryContext(); + kafka_context->applySettingsChanges(settings_adjustments); + + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_insert */ false); + auto block_io = interpreter.execute(); + + auto & topic_partition = consumer_info.topic_partitions[consumer_info.consume_from_topic_partition_index]; + LOG_TRACE( + log, + "Will fetch {}:{} (consume_from_topic_partition_index is {})", + topic_partition.topic, + topic_partition.partition_id, + consumer_info.consume_from_topic_partition_index); + consumer_info.consume_from_topic_partition_index + = (consumer_info.consume_from_topic_partition_index + 1) % consumer_info.topic_partitions.size(); + + bool needs_offset_reset = true; + SCOPE_EXIT({ + if (!needs_offset_reset) + return; + consumer_info.consumer->updateOffsets(consumer_info.topic_partitions); + }); + auto [blocks, last_read_offset] = pollConsumer( + *consumer_info.consumer, topic_partition, consumer_info.locks[topic_partition].intent_size, consumer_info.watch, kafka_context); + + if (blocks.empty()) + { + LOG_TRACE(log, "Didn't get any messages"); + needs_offset_reset = false; + return std::nullopt; + } + + auto converting_dag = ActionsDAG::makeConvertingActions( + blocks.front().cloneEmpty().getColumnsWithTypeAndName(), + block_io.pipeline.getHeader().getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + + auto converting_actions = std::make_shared(std::move(converting_dag)); + + for (auto & block : blocks) + converting_actions->execute(block); + + // We can't cancel during copyData, as it's not aware of commits and other kafka-related stuff. + // It will be cancelled on underlying layer (kafka buffer) + + auto & keeper_to_use = *consumer_info.keeper; + auto & lock_info = consumer_info.locks.at(topic_partition); + lock_info.intent_size = last_read_offset - lock_info.committed_offset.value_or(0); + saveIntent(keeper_to_use, topic_partition, *lock_info.intent_size); + std::atomic_size_t rows = 0; + { + block_io.pipeline.complete(Pipe{std::make_shared(std::move(blocks))}); + + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } + lock_info.committed_offset = last_read_offset + 1; + topic_partition.offset = last_read_offset + 1; + saveCommittedOffset(keeper_to_use, topic_partition); + consumer_info.consumer->commit(topic_partition); + lock_info.intent_size.reset(); + needs_offset_reset = false; + + return rows; +} + +void StorageKafka2::setZooKeeper() +{ + std::unique_lock lock{keeper_mutex}; + keeper = getContext()->getZooKeeper(); +} + +zkutil::ZooKeeperPtr StorageKafka2::tryGetZooKeeper() const +{ + std::unique_lock lock{keeper_mutex}; + return keeper; +} + +zkutil::ZooKeeperPtr StorageKafka2::getZooKeeper() const +{ + auto res = tryGetZooKeeper(); + if (!res) + throw Exception(ErrorCodes::NO_ZOOKEEPER, "Cannot get ZooKeeper"); + return res; +} + +zkutil::ZooKeeperPtr StorageKafka2::getZooKeeperAndAssertActive() const +{ + auto res = getZooKeeper(); + assertActive(); + return res; +} + +zkutil::ZooKeeperPtr StorageKafka2::getZooKeeperIfTableShutDown() const +{ + zkutil::ZooKeeperPtr new_zookeeper = getContext()->getZooKeeper(); + new_zookeeper->sync(keeper_path); + return new_zookeeper; +} + +fs::path StorageKafka2::getTopicPartitionPath(const TopicPartition & topic_partition) +{ + return fs::path(keeper_path) / "topics" / topic_partition.topic / "partitions" / std::to_string(topic_partition.partition_id); +} + +} diff --git a/src/Storages/Kafka/StorageKafka2.h b/src/Storages/Kafka/StorageKafka2.h new file mode 100644 index 00000000000..f85fedb316a --- /dev/null +++ b/src/Storages/Kafka/StorageKafka2.h @@ -0,0 +1,241 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cppkafka +{ + +class Configuration; + +} + +namespace DB +{ + +template +struct KafkaInterceptors; + +using KafkaConsumer2Ptr = std::shared_ptr; + +/// Implements a Kafka queue table engine that can be used as a persistent queue / buffer, +/// or as a basic building block for creating pipelines with a continuous insertion / ETL. +/// +/// It is similar to the already existing StorageKafka, it instead of storing the offsets +/// in Kafka, its main source of information about offsets is Keeper. On top of the +/// offsets, it also stores the number of messages (intent size) it tried to insert from +/// each topic. By storing the intent sizes it is possible to retry the same batch of +/// messages in case of any errors and giving deduplication a chance to deduplicate +/// blocks. +/// +/// To not complicate things too much, the current implementation makes sure to fetch +/// messages only from a single topic-partition on a single thread at a time by +/// manipulating the queues of librdkafka. By pulling from multiple topic-partitions +/// the order of messages are not guaranteed, therefore they would have different +/// hashes for deduplication. +class StorageKafka2 final : public IStorage, WithContext +{ + using KafkaInterceptors = KafkaInterceptors; + friend KafkaInterceptors; + +public: + StorageKafka2( + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + const String & comment, + std::unique_ptr kafka_settings_, + const String & collection_name_); + + std::string getName() const override { return "Kafka"; } + + bool noPushingToViews() const override { return true; } + + void startup() override; + void shutdown(bool is_drop) override; + + void drop() override; + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr + write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; + + /// We want to control the number of rows in a chunk inserted into Kafka + bool prefersLargeBlocks() const override { return false; } + + const auto & getFormatName() const { return format_name; } + + StreamingHandleErrorMode getHandleKafkaErrorMode() const { return kafka_settings->kafka_handle_error_mode; } + +private: + using TopicPartition = KafkaConsumer2::TopicPartition; + using TopicPartitions = KafkaConsumer2::TopicPartitions; + + struct LockedTopicPartitionInfo + { + zkutil::EphemeralNodeHolderPtr lock; + std::optional committed_offset; + std::optional intent_size; + }; + + using TopicPartitionLocks = std::unordered_map< + TopicPartition, + LockedTopicPartitionInfo, + KafkaConsumer2::OnlyTopicNameAndPartitionIdHash, + KafkaConsumer2::OnlyTopicNameAndPartitionIdEquality>; + + struct ConsumerAndAssignmentInfo + { + KafkaConsumer2Ptr consumer; + size_t consume_from_topic_partition_index{0}; + TopicPartitions topic_partitions{}; + zkutil::ZooKeeperPtr keeper; + TopicPartitionLocks locks{}; + Stopwatch watch{CLOCK_MONOTONIC_COARSE}; + }; + + struct PolledBatchInfo + { + BlocksList blocks; + int64_t last_offset; + }; + + // Stream thread + struct TaskContext + { + BackgroundSchedulePool::TaskHolder holder; + std::atomic stream_cancelled{false}; + explicit TaskContext(BackgroundSchedulePool::TaskHolder && task_) : holder(std::move(task_)) { } + }; + + enum class AssignmentChange + { + NotChanged, + Updated, + Lost + }; + + // Configuration and state + mutable std::mutex keeper_mutex; + zkutil::ZooKeeperPtr keeper; + String keeper_path; + String replica_path; + std::unique_ptr kafka_settings; + Macros::MacroExpansionInfo macros_info; + const Names topics; + const String brokers; + const String group; + const String client_id; + const String format_name; + const size_t max_rows_per_message; + const String schema_name; + const size_t num_consumers; /// total number of consumers + LoggerPtr log; + Poco::Semaphore semaphore; + const SettingsChanges settings_adjustments; + /// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called). + /// In this case we still need to be able to shutdown() properly. + size_t num_created_consumers = 0; /// number of actually created consumers. + std::vector consumers; + std::vector> tasks; + bool thread_per_consumer = false; + /// For memory accounting in the librdkafka threads. + std::mutex thread_statuses_mutex; + std::list> thread_statuses; + /// If named_collection is specified. + String collection_name; + std::atomic shutdown_called = false; + + // Handling replica activation. + std::atomic is_active = false; + zkutil::EphemeralNodeHolderPtr replica_is_active_node; + BackgroundSchedulePool::TaskHolder activating_task; + String active_node_identifier; + UInt64 consecutive_activate_failures = 0; + bool activate(); + void activateAndReschedule(); + void partialShutdown(); + + void assertActive() const; + KafkaConsumer2Ptr createConsumer(size_t consumer_number); + // Returns full consumer related configuration, also the configuration + // contains global kafka properties. + cppkafka::Configuration getConsumerConfiguration(size_t consumer_number); + // Returns full producer related configuration, also the configuration + // contains global kafka properties. + cppkafka::Configuration getProducerConfiguration(); + + void threadFunc(size_t idx); + + size_t getPollMaxBatchSize() const; + size_t getMaxBlockSize() const; + size_t getPollTimeoutMillisecond() const; + + enum class StallReason + { + NoAssignment, + CouldNotAcquireLocks, + NoPartitions, + NoMessages, + KeeperSessionEnded, + }; + + std::optional streamToViews(size_t idx); + + std::optional streamFromConsumer(ConsumerAndAssignmentInfo & consumer_info); + + // Returns true if this is the first replica + bool createTableIfNotExists(); + // Returns true if all of the nodes were cleaned up + bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr keeper_to_use, const zkutil::EphemeralNodeHolder::Ptr & drop_lock); + // Creates only the replica in ZooKeeper. Shouldn't be called on the first replica as it is created in createTableIfNotExists + void createReplica(); + void dropReplica(); + + // Takes lock over topic partitions and sets the committed offset in topic_partitions. + std::optional lockTopicPartitions(zkutil::ZooKeeper & keeper_to_use, const TopicPartitions & topic_partitions); + void saveCommittedOffset(zkutil::ZooKeeper & keeper_to_use, const TopicPartition & topic_partition); + void saveIntent(zkutil::ZooKeeper & keeper_to_use, const TopicPartition & topic_partition, int64_t intent); + + PolledBatchInfo pollConsumer( + KafkaConsumer2 & consumer, + const TopicPartition & topic_partition, + std::optional message_count, + Stopwatch & watch, + const ContextPtr & context); + + void setZooKeeper(); + zkutil::ZooKeeperPtr tryGetZooKeeper() const; + zkutil::ZooKeeperPtr getZooKeeper() const; + zkutil::ZooKeeperPtr getZooKeeperAndAssertActive() const; + zkutil::ZooKeeperPtr getZooKeeperIfTableShutDown() const; + + + std::filesystem::path getTopicPartitionPath(const TopicPartition & topic_partition); +}; + +} diff --git a/src/Storages/Kafka/StorageKafkaUtils.cpp b/src/Storages/Kafka/StorageKafkaUtils.cpp new file mode 100644 index 00000000000..cdc32d775eb --- /dev/null +++ b/src/Storages/Kafka/StorageKafkaUtils.cpp @@ -0,0 +1,452 @@ +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if USE_KRB5 +# include +#endif // USE_KRB5 + +namespace ProfileEvents +{ +extern const Event KafkaConsumerErrors; +} + +namespace DB +{ + +using namespace std::chrono_literals; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SUPPORT_IS_DISABLED; +} + + +void registerStorageKafka(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + size_t args_count = engine_args.size(); + const bool has_settings = args.storage_def->settings; + + auto kafka_settings = std::make_unique(); + String collection_name; + if (auto named_collection = tryGetNamedCollectionWithOverrides(args.engine_args, args.getLocalContext())) + { + for (const auto & setting : kafka_settings->all()) + { + const auto & setting_name = setting.getName(); + if (named_collection->has(setting_name)) + kafka_settings->set(setting_name, named_collection->get(setting_name)); + } + collection_name = assert_cast(args.engine_args[0].get())->name(); + } + + if (has_settings) + { + kafka_settings->loadFromQuery(*args.storage_def); + } + +// Check arguments and settings +#define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME, EVAL) \ + /* One of the four required arguments is not specified */ \ + if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && !kafka_settings->PAR_NAME.changed) \ + { \ + throw Exception( \ + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, \ + "Required parameter '{}' " \ + "for storage Kafka not specified", \ + #PAR_NAME); \ + } \ + if (args_count >= (ARG_NUM)) \ + { \ + /* The same argument is given in two places */ \ + if (has_settings && kafka_settings->PAR_NAME.changed) \ + { \ + throw Exception( \ + ErrorCodes::BAD_ARGUMENTS, \ + "The argument №{} of storage Kafka " \ + "and the parameter '{}' " \ + "in SETTINGS cannot be specified at the same time", \ + #ARG_NUM, \ + #PAR_NAME); \ + } \ + /* move engine args to settings */ \ + else \ + { \ + if constexpr ((EVAL) == 1) \ + { \ + engine_args[(ARG_NUM)-1] = evaluateConstantExpressionAsLiteral(engine_args[(ARG_NUM)-1], args.getLocalContext()); \ + } \ + if constexpr ((EVAL) == 2) \ + { \ + engine_args[(ARG_NUM)-1] \ + = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[(ARG_NUM)-1], args.getLocalContext()); \ + } \ + kafka_settings->PAR_NAME = engine_args[(ARG_NUM)-1]->as().value; \ + } \ + } + + /** Arguments of engine is following: + * - Kafka broker list + * - List of topics + * - Group ID (may be a constant expression with a string result) + * - Message format (string) + * - Row delimiter + * - Schema (optional, if the format supports it) + * - Number of consumers + * - Max block size for background consumption + * - Skip (at least) unreadable messages number + * - Do intermediate commits when the batch consumed and handled + */ + + /* 0 = raw, 1 = evaluateConstantExpressionAsLiteral, 2=evaluateConstantExpressionOrIdentifierAsLiteral */ + /// In case of named collection we already validated the arguments. + if (collection_name.empty()) + { + CHECK_KAFKA_STORAGE_ARGUMENT(1, kafka_broker_list, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list, 1) + CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(9, kafka_skip_broken_messages, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(10, kafka_commit_every_batch, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(11, kafka_client_id, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(12, kafka_poll_timeout_ms, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(13, kafka_flush_interval_ms, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(14, kafka_thread_per_consumer, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(15, kafka_handle_error_mode, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(16, kafka_commit_on_select, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(17, kafka_max_rows_per_message, 0) + } + +#undef CHECK_KAFKA_STORAGE_ARGUMENT + + auto num_consumers = kafka_settings->kafka_num_consumers.value; + auto max_consumers = std::max(getNumberOfPhysicalCPUCores(), 16); + + if (!args.getLocalContext()->getSettingsRef().kafka_disable_num_consumers_limit && num_consumers > max_consumers) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The number of consumers can not be bigger than {}. " + "A single consumer can read any number of partitions. " + "Extra consumers are relatively expensive, " + "and using a lot of them can lead to high memory and CPU usage. " + "To achieve better performance " + "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " + "and ensure you have enough threads " + "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " + "See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", + max_consumers); + } + else if (num_consumers < 1) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be lower than 1"); + } + + if (kafka_settings->kafka_max_block_size.changed && kafka_settings->kafka_max_block_size.value < 1) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_max_block_size can not be lower than 1"); + } + + if (kafka_settings->kafka_poll_max_batch_size.changed && kafka_settings->kafka_poll_max_batch_size.value < 1) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); + } + NamesAndTypesList supported_columns; + for (const auto & column : args.columns) + { + if (column.default_desc.kind == ColumnDefaultKind::Alias) + supported_columns.emplace_back(column.name, column.type); + if (column.default_desc.kind == ColumnDefaultKind::Default && !column.default_desc.expression) + supported_columns.emplace_back(column.name, column.type); + } + // Kafka engine allows only ordinary columns without default expression or alias columns. + if (args.columns.getAll() != supported_columns) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL expressions for columns. " + "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); + } + + const auto has_keeper_path = kafka_settings->kafka_keeper_path.changed && !kafka_settings->kafka_keeper_path.value.empty(); + const auto has_replica_name = kafka_settings->kafka_replica_name.changed && !kafka_settings->kafka_replica_name.value.empty(); + + if (!has_keeper_path && !has_replica_name) + return std::make_shared( + args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); + + if (!args.getLocalContext()->getSettingsRef().allow_experimental_kafka_offsets_storage_in_keeper && !args.query.attach) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "Storing the Kafka offsets in Keeper is experimental. Set `allow_experimental_kafka_offsets_storage_in_keeper` setting " + "to enable it"); + + if (!has_keeper_path || !has_replica_name) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Either specify both zookeeper path and replica name or none of them"); + + const auto is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + const auto is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; + + // UUID macro is only allowed: + // - with Atomic database only with ON CLUSTER queries, otherwise it is easy to misuse: each replica would have separate uuid generated. + // - with Replicated database + // - with attach queries, as those are used on server startup + const auto allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; + + auto context = args.getContext(); + // Unfold {database} and {table} macro on table creation, so table can be renamed. + if (args.mode < LoadingStrictnessLevel::ATTACH) + { + Macros::MacroExpansionInfo info; + /// NOTE: it's not recursive + info.expand_special_macros_only = true; + info.table_id = args.table_id; + // We could probably unfold UUID here too, but let's keep it similar to ReplicatedMergeTree, which doesn't do the unfolding. + info.table_id.uuid = UUIDHelpers::Nil; + kafka_settings->kafka_keeper_path.value = context->getMacros()->expand(kafka_settings->kafka_keeper_path.value, info); + + info.level = 0; + kafka_settings->kafka_replica_name.value = context->getMacros()->expand(kafka_settings->kafka_replica_name.value, info); + } + + + auto * settings_query = args.storage_def->settings; + chassert(has_settings && "Unexpected settings query in StorageKafka"); + + settings_query->changes.setSetting("kafka_keeper_path", kafka_settings->kafka_keeper_path.value); + settings_query->changes.setSetting("kafka_replica_name", kafka_settings->kafka_replica_name.value); + + // Expand other macros (such as {replica}). We do not expand them on previous step to make possible copying metadata files between replicas. + // Disable expanding {shard} macro, because it can lead to incorrect behavior and it doesn't make sense to shard Kafka tables. + Macros::MacroExpansionInfo info; + info.table_id = args.table_id; + if (is_replicated_database) + { + auto database = DatabaseCatalog::instance().getDatabase(args.table_id.database_name); + info.shard.reset(); + info.replica = getReplicatedDatabaseReplicaName(database); + } + if (!allow_uuid_macro) + info.table_id.uuid = UUIDHelpers::Nil; + kafka_settings->kafka_keeper_path.value = context->getMacros()->expand(kafka_settings->kafka_keeper_path.value, info); + + info.level = 0; + info.table_id.uuid = UUIDHelpers::Nil; + kafka_settings->kafka_replica_name.value = context->getMacros()->expand(kafka_settings->kafka_replica_name.value, info); + + return std::make_shared( + args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); + }; + + factory.registerStorage( + "Kafka", + creator_fn, + StorageFactory::StorageFeatures{ + .supports_settings = true, + }); +} + +namespace StorageKafkaUtils +{ +Names parseTopics(String topic_list) +{ + Names result; + boost::split(result, topic_list, [](char c) { return c == ','; }); + for (String & topic : result) + boost::trim(topic); + return result; +} + +String getDefaultClientId(const StorageID & table_id) +{ + return fmt::format("{}-{}-{}-{}", VERSION_NAME, getFQDNOrHostName(), table_id.database_name, table_id.table_name); +} + +void drainConsumer( + cppkafka::Consumer & consumer, const std::chrono::milliseconds drain_timeout, const LoggerPtr & log, ErrorHandler error_handler) +{ + auto start_time = std::chrono::steady_clock::now(); + cppkafka::Error last_error(RD_KAFKA_RESP_ERR_NO_ERROR); + + while (true) + { + auto msg = consumer.poll(100ms); + if (!msg) + break; + + auto error = msg.get_error(); + + if (error) + { + if (msg.is_eof() || error == last_error) + { + break; + } + else + { + LOG_ERROR(log, "Error during draining: {}", error); + error_handler(error); + } + } + + // i don't stop draining on first error, + // only if it repeats once again sequentially + last_error = error; + + auto ts = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(ts - start_time) > drain_timeout) + { + LOG_ERROR(log, "Timeout during draining."); + break; + } + } +} + +void eraseMessageErrors(Messages & messages, const LoggerPtr & log, ErrorHandler error_handler) +{ + size_t skipped = std::erase_if( + messages, + [&](auto & message) + { + if (auto error = message.get_error()) + { + ProfileEvents::increment(ProfileEvents::KafkaConsumerErrors); + LOG_ERROR(log, "Consumer error: {}", error); + error_handler(error); + return true; + } + return false; + }); + + if (skipped) + LOG_ERROR(log, "There were {} messages with an error", skipped); +} + +SettingsChanges createSettingsAdjustments(KafkaSettings & kafka_settings, const String & schema_name) +{ + SettingsChanges result; + // Needed for backward compatibility + if (!kafka_settings.input_format_skip_unknown_fields.changed) + { + // Always skip unknown fields regardless of the context (JSON or TSKV) + kafka_settings.input_format_skip_unknown_fields = true; + } + + if (!kafka_settings.input_format_allow_errors_ratio.changed) + { + kafka_settings.input_format_allow_errors_ratio = 0.; + } + + if (!kafka_settings.input_format_allow_errors_num.changed) + { + kafka_settings.input_format_allow_errors_num = kafka_settings.kafka_skip_broken_messages.value; + } + + if (!schema_name.empty()) + result.emplace_back("format_schema", schema_name); + + for (const auto & setting : kafka_settings) + { + const auto & name = setting.getName(); + if (name.find("kafka_") == std::string::npos) + result.emplace_back(name, setting.getValue()); + } + return result; +} + + +bool checkDependencies(const StorageID & table_id, const ContextPtr& context) +{ + // Check if all dependencies are attached + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (view_ids.empty()) + return true; + + // Check the dependencies are ready? + for (const auto & view_id : view_ids) + { + auto view = DatabaseCatalog::instance().tryGetTable(view_id, context); + if (!view) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(view.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + + // Check all its dependencies + if (!checkDependencies(view_id, context)) + return false; + } + + return true; +} + + +VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_topic", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_key", std::make_shared(), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + desc.addEphemeral("_partition", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_timestamp_ms", std::make_shared(std::make_shared(3)), ""); + desc.addEphemeral("_headers.name", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_headers.value", std::make_shared(std::make_shared()), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(), ""); + desc.addEphemeral("_error", std::make_shared(), ""); + } + + return desc; +} +} +} diff --git a/src/Storages/Kafka/StorageKafkaUtils.h b/src/Storages/Kafka/StorageKafkaUtils.h new file mode 100644 index 00000000000..cc956dde78d --- /dev/null +++ b/src/Storages/Kafka/StorageKafkaUtils.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Poco +{ +namespace Util +{ + class AbstractConfiguration; +} +} + +namespace DB +{ + +class VirtualColumnsDescription; +struct KafkaSettings; + +namespace StorageKafkaUtils +{ +Names parseTopics(String topic_list); +String getDefaultClientId(const StorageID & table_id); + +using ErrorHandler = std::function; + +void drainConsumer( + cppkafka::Consumer & consumer, + std::chrono::milliseconds drain_timeout, + const LoggerPtr & log, + ErrorHandler error_handler = [](const cppkafka::Error & /*err*/) {}); + +using Messages = std::vector; +void eraseMessageErrors(Messages & messages, const LoggerPtr & log, ErrorHandler error_handler = [](const cppkafka::Error & /*err*/) {}); + +SettingsChanges createSettingsAdjustments(KafkaSettings & kafka_settings, const String & schema_name); + +bool checkDependencies(const StorageID & table_id, const ContextPtr& context); + +VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); +} +} + +template <> +struct fmt::formatter : fmt::ostream_formatter +{ +}; +template <> +struct fmt::formatter : fmt::ostream_formatter +{ +}; diff --git a/src/Storages/Kafka/parseSyslogLevel.cpp b/src/Storages/Kafka/parseSyslogLevel.cpp index 43630a5001f..828cffc311b 100644 --- a/src/Storages/Kafka/parseSyslogLevel.cpp +++ b/src/Storages/Kafka/parseSyslogLevel.cpp @@ -1,4 +1,5 @@ -#include "parseSyslogLevel.h" +#include + #include /// Must be in a separate compilation unit due to macros overlaps: diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index a3ef327dc24..7536f59c1e4 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -9,11 +9,6 @@ namespace CurrentMetrics namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - RefreshSet::Handle::Handle(Handle && other) noexcept { *this = std::move(other); @@ -27,6 +22,7 @@ RefreshSet::Handle & RefreshSet::Handle::operator=(Handle && other) noexcept parent_set = std::exchange(other.parent_set, nullptr); id = std::move(other.id); dependencies = std::move(other.dependencies); + iter = std::move(other.iter); metric_increment = std::move(other.metric_increment); return *this; } @@ -39,21 +35,21 @@ RefreshSet::Handle::~Handle() void RefreshSet::Handle::rename(StorageID new_id) { std::lock_guard lock(parent_set->mutex); - parent_set->removeDependenciesLocked(id, dependencies); - auto it = parent_set->tasks.find(id); - auto task = it->second; - parent_set->tasks.erase(it); + RefreshTaskHolder task = *iter; + parent_set->removeDependenciesLocked(task, dependencies); + parent_set->removeTaskLocked(id, iter); id = new_id; - parent_set->tasks.emplace(id, task); - parent_set->addDependenciesLocked(id, dependencies); + iter = parent_set->addTaskLocked(id, task); + parent_set->addDependenciesLocked(task, dependencies); } void RefreshSet::Handle::changeDependencies(std::vector deps) { std::lock_guard lock(parent_set->mutex); - parent_set->removeDependenciesLocked(id, dependencies); + RefreshTaskHolder task = *iter; + parent_set->removeDependenciesLocked(task, dependencies); dependencies = std::move(deps); - parent_set->addDependenciesLocked(id, dependencies); + parent_set->addDependenciesLocked(task, dependencies); } void RefreshSet::Handle::reset() @@ -63,8 +59,8 @@ void RefreshSet::Handle::reset() { std::lock_guard lock(parent_set->mutex); - parent_set->removeDependenciesLocked(id, dependencies); - parent_set->tasks.erase(id); + parent_set->removeDependenciesLocked(*iter, dependencies); + parent_set->removeTaskLocked(id, iter); } parent_set = nullptr; @@ -76,37 +72,50 @@ RefreshSet::RefreshSet() = default; void RefreshSet::emplace(StorageID id, const std::vector & dependencies, RefreshTaskHolder task) { std::lock_guard guard(mutex); - auto [it, is_inserted] = tasks.emplace(id, task); - if (!is_inserted) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Refresh set entry already exists for table {}", id.getFullTableName()); - addDependenciesLocked(id, dependencies); + const auto iter = addTaskLocked(id, task); + addDependenciesLocked(task, dependencies); - task->setRefreshSetHandleUnlock(Handle(this, id, dependencies)); + task->setRefreshSetHandleUnlock(Handle(this, id, iter, dependencies)); } -void RefreshSet::addDependenciesLocked(const StorageID & id, const std::vector & dependencies) +RefreshTaskList::iterator RefreshSet::addTaskLocked(StorageID id, RefreshTaskHolder task) +{ + RefreshTaskList & list = tasks[id]; + list.push_back(task); + return std::prev(list.end()); +} + +void RefreshSet::removeTaskLocked(StorageID id, RefreshTaskList::iterator iter) +{ + const auto it = tasks.find(id); + it->second.erase(iter); + if (it->second.empty()) + tasks.erase(it); +} + +void RefreshSet::addDependenciesLocked(RefreshTaskHolder task, const std::vector & dependencies) { for (const StorageID & dep : dependencies) - dependents[dep].insert(id); + dependents[dep].insert(task); } -void RefreshSet::removeDependenciesLocked(const StorageID & id, const std::vector & dependencies) +void RefreshSet::removeDependenciesLocked(RefreshTaskHolder task, const std::vector & dependencies) { for (const StorageID & dep : dependencies) { auto & set = dependents[dep]; - set.erase(id); + set.erase(task); if (set.empty()) dependents.erase(dep); } } -RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const +RefreshTaskList RefreshSet::findTasks(const StorageID & id) const { std::lock_guard lock(mutex); - if (auto task = tasks.find(id); task != tasks.end()) - return task->second; - return nullptr; + if (auto it = tasks.find(id); it != tasks.end()) + return it->second; + return {}; } RefreshSet::InfoContainer RefreshSet::getInfo() const @@ -116,26 +125,23 @@ RefreshSet::InfoContainer RefreshSet::getInfo() const lock.unlock(); InfoContainer res; - for (const auto & [id, task] : tasks_copy) - res.push_back(task->getInfo()); + for (const auto & [id, list] : tasks_copy) + for (const auto & task : list) + res.push_back(task->getInfo()); return res; } std::vector RefreshSet::getDependents(const StorageID & id) const { std::lock_guard lock(mutex); - std::vector res; auto it = dependents.find(id); if (it == dependents.end()) return {}; - for (const StorageID & dep_id : it->second) - if (auto task = tasks.find(dep_id); task != tasks.end()) - res.push_back(task->second); - return res; + return std::vector(it->second.begin(), it->second.end()); } -RefreshSet::Handle::Handle(RefreshSet * parent_set_, StorageID id_, std::vector dependencies_) +RefreshSet::Handle::Handle(RefreshSet * parent_set_, StorageID id_, RefreshTaskList::iterator iter_, std::vector dependencies_) : parent_set(parent_set_), id(std::move(id_)), dependencies(std::move(dependencies_)) - , metric_increment(CurrentMetrics::Increment(CurrentMetrics::RefreshableViews)) {} + , iter(iter_), metric_increment(CurrentMetrics::Increment(CurrentMetrics::RefreshableViews)) {} } diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h index eff445023a6..6141a69996a 100644 --- a/src/Storages/MaterializedView/RefreshSet.h +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -5,13 +5,12 @@ #include #include #include +#include namespace DB { -using DatabaseAndTableNameSet = std::unordered_set; - -enum class RefreshState : RefreshTaskStateUnderlying +enum class RefreshState { Disabled = 0, Scheduled, @@ -19,11 +18,11 @@ enum class RefreshState : RefreshTaskStateUnderlying Running, }; -enum class LastRefreshResult : RefreshTaskStateUnderlying +enum class LastRefreshResult { Unknown = 0, Cancelled, - Exception, + Error, Finished }; @@ -37,7 +36,8 @@ struct RefreshInfo UInt64 last_attempt_duration_ms = 0; UInt32 next_refresh_time = 0; UInt64 refresh_count = 0; - String exception_message; // if last_refresh_result is Exception + UInt64 retry = 0; + String exception_message; // if last_refresh_result is Error std::vector remaining_dependencies; ProgressValues progress; }; @@ -46,8 +46,7 @@ struct RefreshInfo class RefreshSet { public: - /// RAII thing that unregisters a task and its dependencies in destructor. - /// Storage IDs must be unique. Not thread safe. + /// RAII thing that unregisters a task and its dependencies in destructor. Not thread safe. class Handle { friend class RefreshSet; @@ -73,9 +72,10 @@ public: RefreshSet * parent_set = nullptr; StorageID id = StorageID::createEmpty(); std::vector dependencies; + RefreshTaskList::iterator iter; // in parent_set->tasks[id] std::optional metric_increment; - Handle(RefreshSet * parent_set_, StorageID id_, std::vector dependencies_); + Handle(RefreshSet * parent_set_, StorageID id_, RefreshTaskList::iterator iter_, std::vector dependencies_); }; using InfoContainer = std::vector; @@ -84,7 +84,9 @@ public: void emplace(StorageID id, const std::vector & dependencies, RefreshTaskHolder task); - RefreshTaskHolder getTask(const StorageID & id) const; + /// Finds active refreshable view(s) by database and table name. + /// Normally there's at most one, but we allow name collisions here, just in case. + RefreshTaskList findTasks(const StorageID & id) const; InfoContainer getInfo() const; @@ -92,8 +94,8 @@ public: std::vector getDependents(const StorageID & id) const; private: - using TaskMap = std::unordered_map; - using DependentsMap = std::unordered_map; + using TaskMap = std::unordered_map; + using DependentsMap = std::unordered_map, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>; /// Protects the two maps below, not locked for any nontrivial operations (e.g. operations that /// block or lock other mutexes). @@ -102,8 +104,10 @@ private: TaskMap tasks; DependentsMap dependents; - void addDependenciesLocked(const StorageID & id, const std::vector & dependencies); - void removeDependenciesLocked(const StorageID & id, const std::vector & dependencies); + RefreshTaskList::iterator addTaskLocked(StorageID id, RefreshTaskHolder task); + void removeTaskLocked(StorageID id, RefreshTaskList::iterator iter); + void addDependenciesLocked(RefreshTaskHolder task, const std::vector & dependencies); + void removeDependenciesLocked(RefreshTaskHolder task, const std::vector & dependencies); }; } diff --git a/src/Storages/MaterializedView/RefreshSettings.h b/src/Storages/MaterializedView/RefreshSettings.h index 814c7e52b32..23676538788 100644 --- a/src/Storages/MaterializedView/RefreshSettings.h +++ b/src/Storages/MaterializedView/RefreshSettings.h @@ -6,8 +6,10 @@ namespace DB { #define LIST_OF_REFRESH_SETTINGS(M, ALIAS) \ - /// TODO: Add settings - /// M(UInt64, name, 42, "...", 0) + M(Int64, refresh_retries, 0, "How many times to retry refresh query if it fails. If all attempts fail, wait for the next refresh time according to schedule. 0 to disable retries. -1 for infinite retries.", 0) \ + M(UInt64, refresh_retry_initial_backoff_ms, 100, "Delay before the first retry if refresh query fails (if refresh_retries setting is not zero). Each subsequent retry doubles the delay, up to refresh_retry_max_backoff_ms.", 0) \ + M(UInt64, refresh_retry_max_backoff_ms, 60'000, "Limit on the exponential growth of delay between refresh attempts, if they keep failing and refresh_retries is positive.", 0) \ + DECLARE_SETTINGS_TRAITS(RefreshSettingsTraits, LIST_OF_REFRESH_SETTINGS) diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index aa8f51d5295..ed5a6652288 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -1,7 +1,5 @@ #include -#include - #include #include #include @@ -11,6 +9,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -24,43 +23,42 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; + extern const int REFRESH_FAILED; } RefreshTask::RefreshTask( - const ASTRefreshStrategy & strategy) + StorageMaterializedView * view_, const DB::ASTRefreshStrategy & strategy) : log(getLogger("RefreshTask")) + , view(view_) , refresh_schedule(strategy) -{} + , refresh_append(strategy.append) +{ + if (strategy.settings != nullptr) + refresh_settings.applyChanges(strategy.settings->changes); +} -RefreshTaskHolder RefreshTask::create( - const StorageMaterializedView & view, +OwnedRefreshTask RefreshTask::create( + StorageMaterializedView * view, ContextMutablePtr context, const DB::ASTRefreshStrategy & strategy) { - auto task = std::make_shared(strategy); + auto task = std::make_shared(view, strategy); - task->refresh_task = context->getSchedulePool().createTask("MaterializedViewRefresherTask", - [self = task->weak_from_this()] - { - if (auto t = self.lock()) - t->refreshTask(); - }); + task->refresh_task = context->getSchedulePool().createTask("RefreshTask", + [self = task.get()] { self->refreshTask(); }); - std::vector deps; if (strategy.dependencies) for (auto && dependency : strategy.dependencies->children) - deps.emplace_back(dependency->as()); + task->initial_dependencies.emplace_back(dependency->as()); - context->getRefreshSet().emplace(view.getStorageID(), deps, task); - - return task; + return OwnedRefreshTask(task); } -void RefreshTask::initializeAndStart(std::shared_ptr view) +void RefreshTask::initializeAndStart() { - view_to_refresh = view; if (view->getContext()->getSettingsRef().stop_refreshable_materialized_views_on_startup) stop_requested = true; + view->getContext()->getRefreshSet().emplace(view->getStorageID(), initial_dependencies, shared_from_this()); populateDependencies(); advanceNextRefreshTime(currentTime()); refresh_task->schedule(); @@ -69,7 +67,8 @@ void RefreshTask::initializeAndStart(std::shared_ptr vi void RefreshTask::rename(StorageID new_id) { std::lock_guard guard(mutex); - set_handle.rename(new_id); + if (set_handle) + set_handle.rename(new_id); } void RefreshTask::alterRefreshParams(const DB::ASTRefreshStrategy & new_strategy) @@ -104,7 +103,11 @@ void RefreshTask::alterRefreshParams(const DB::ASTRefreshStrategy & new_strategy if (arriveDependency(id) && !std::exchange(refresh_immediately, true)) refresh_task->schedule(); - /// TODO: Update settings once we have them. + refresh_settings = {}; + if (new_strategy.settings != nullptr) + refresh_settings.applyChanges(new_strategy.settings->changes); + + refresh_append = new_strategy.append; } RefreshInfo RefreshTask::getInfo() const @@ -113,7 +116,7 @@ RefreshInfo RefreshTask::getInfo() const auto res = info; res.view_id = set_handle.getID(); res.remaining_dependencies.assign(remaining_dependencies.begin(), remaining_dependencies.end()); - if (res.last_refresh_result != LastRefreshResult::Exception) + if (res.last_refresh_result != LastRefreshResult::Error) res.exception_message.clear(); res.progress = progress.getValues(); return res; @@ -141,6 +144,8 @@ void RefreshTask::run() std::lock_guard guard(mutex); if (std::exchange(refresh_immediately, true)) return; + next_refresh_prescribed = std::chrono::floor(currentTime()); + next_refresh_actual = currentTime(); refresh_task->schedule(); } @@ -151,10 +156,22 @@ void RefreshTask::cancel() refresh_task->schedule(); } +void RefreshTask::wait() +{ + std::unique_lock lock(mutex); + refresh_cv.wait(lock, [&] { return info.state != RefreshState::Running && !refresh_immediately; }); + if (info.last_refresh_result == LastRefreshResult::Error) + throw Exception(ErrorCodes::REFRESH_FAILED, "Refresh failed: {}", info.exception_message); +} + void RefreshTask::shutdown() { { std::lock_guard guard(mutex); + + if (view == nullptr) + return; // already shut down + stop_requested = true; interruptExecution(); } @@ -168,6 +185,8 @@ void RefreshTask::shutdown() /// (Also, RefreshSet holds a shared_ptr to us.) std::lock_guard guard(mutex); set_handle.reset(); + + view = nullptr; } void RefreshTask::notify(const StorageID & parent_id, std::chrono::sys_seconds parent_next_prescribed_time) @@ -234,6 +253,7 @@ void RefreshTask::refreshTask() chassert(lock.owns_lock()); interrupt_execution.store(false); + refresh_cv.notify_all(); // we'll assign info.state before unlocking the mutex if (stop_requested) { @@ -245,7 +265,7 @@ void RefreshTask::refreshTask() if (!refresh_immediately) { auto now = currentTime(); - if (now >= next_refresh_with_spread) + if (now >= next_refresh_actual) { if (arriveTime()) refresh_immediately = true; @@ -258,7 +278,7 @@ void RefreshTask::refreshTask() else { size_t delay_ms = std::chrono::duration_cast( - next_refresh_with_spread - now).count(); + next_refresh_actual - now).count(); /// If we're in a test that fakes the clock, poll every 100ms. if (fake_clock.load(std::memory_order_relaxed) != INT64_MIN) @@ -272,19 +292,9 @@ void RefreshTask::refreshTask() /// Perform a refresh. + bool append = refresh_append; refresh_immediately = false; - - auto view = lockView(); - if (!view) - { - /// The view was dropped. This RefreshTask should be destroyed soon too. - /// (Maybe this is unreachable.) - info.state = RefreshState::Disabled; - break; - } - info.state = RefreshState::Running; - CurrentMetrics::Increment metric_inc(CurrentMetrics::RefreshingViews); lock.unlock(); @@ -295,19 +305,13 @@ void RefreshTask::refreshTask() try { - executeRefreshUnlocked(view); + executeRefreshUnlocked(append); refreshed = true; } catch (...) { if (!interrupt_execution.load()) - { - PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); - auto text = message.text; - message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text); - LOG_ERROR(log, message); - exception = text; - } + exception = getCurrentExceptionMessage(true); } lock.lock(); @@ -319,18 +323,18 @@ void RefreshTask::refreshTask() if (exception) { - info.last_refresh_result = LastRefreshResult::Exception; + info.last_refresh_result = LastRefreshResult::Error; info.exception_message = *exception; - - /// TODO: Do a few retries with exponential backoff. - advanceNextRefreshTime(now); + Int64 attempt_number = num_retries + 1; + scheduleRetryOrSkipToNextRefresh(now); + LOG_ERROR(log, "Refresh view {} failed (attempt {}/{}): {}", view->getStorageID().getFullTableName(), attempt_number, refresh_settings.refresh_retries + 1, *exception); } else if (!refreshed) { info.last_refresh_result = LastRefreshResult::Cancelled; /// Make sure we don't just start another refresh immediately. - if (!stop_requested && now >= next_refresh_with_spread) + if (!stop_requested) advanceNextRefreshTime(now); } else @@ -363,17 +367,18 @@ void RefreshTask::refreshTask() } } -void RefreshTask::executeRefreshUnlocked(std::shared_ptr view) +void RefreshTask::executeRefreshUnlocked(bool append) { LOG_DEBUG(log, "Refreshing view {}", view->getStorageID().getFullTableName()); progress.reset(); - /// Create a table. - auto [refresh_context, refresh_query] = view->prepareRefresh(); - - StorageID stale_table = StorageID::createEmpty(); + ContextMutablePtr refresh_context = view->createRefreshContext(); + std::optional table_to_drop; try { + /// Create a table. + auto refresh_query = view->prepareRefresh(append, refresh_context, table_to_drop); + /// Run the query. { CurrentThread::QueryScope query_scope(refresh_context); // create a thread group for the query @@ -431,37 +436,55 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptrexchangeTargetTable(refresh_query->table_id, refresh_context); + if (!append) + table_to_drop = view->exchangeTargetTable(refresh_query->table_id, refresh_context); } catch (...) { - try - { - InterpreterDropQuery::executeDropQuery( - ASTDropQuery::Kind::Drop, view->getContext(), refresh_context, refresh_query->table_id, /*sync*/ false, /*ignore_sync_setting*/ true); - } - catch (...) - { - tryLogCurrentException(log, "Failed to drop temporary table after a failed refresh"); - /// Let's ignore this and keep going, at risk of accumulating many trash tables if this keeps happening. - } + if (table_to_drop.has_value()) + view->dropTempTable(table_to_drop.value(), refresh_context); throw; } - /// Drop the old table (outside the try-catch so we don't try to drop the other table if this fails). - InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, view->getContext(), refresh_context, stale_table, /*sync*/ true, /*ignore_sync_setting*/ true); + if (table_to_drop.has_value()) + view->dropTempTable(table_to_drop.value(), refresh_context); } void RefreshTask::advanceNextRefreshTime(std::chrono::system_clock::time_point now) { std::chrono::sys_seconds next = refresh_schedule.prescribeNext(next_refresh_prescribed, now); next_refresh_prescribed = next; - next_refresh_with_spread = refresh_schedule.addRandomSpread(next); + next_refresh_actual = refresh_schedule.addRandomSpread(next); - auto secs = std::chrono::floor(next_refresh_with_spread); + num_retries = 0; + info.retry = num_retries; + + auto secs = std::chrono::floor(next_refresh_actual); info.next_refresh_time = UInt32(secs.time_since_epoch().count()); } +void RefreshTask::scheduleRetryOrSkipToNextRefresh(std::chrono::system_clock::time_point now) +{ + if (refresh_settings.refresh_retries >= 0 && num_retries >= refresh_settings.refresh_retries) + { + advanceNextRefreshTime(now); + return; + } + + num_retries += 1; + info.retry = num_retries; + + UInt64 delay_ms; + UInt64 multiplier = UInt64(1) << std::min(num_retries - 1, Int64(62)); + /// Overflow check: a*b <= c iff a <= c/b iff a <= floor(c/b). + if (refresh_settings.refresh_retry_initial_backoff_ms <= refresh_settings.refresh_retry_max_backoff_ms / multiplier) + delay_ms = refresh_settings.refresh_retry_initial_backoff_ms * multiplier; + else + delay_ms = refresh_settings.refresh_retry_max_backoff_ms; + + next_refresh_actual = now + std::chrono::milliseconds(delay_ms); +} + bool RefreshTask::arriveDependency(const StorageID & parent) { remaining_dependencies.erase(parent); @@ -502,11 +525,6 @@ void RefreshTask::interruptExecution() } } -std::shared_ptr RefreshTask::lockView() -{ - return std::static_pointer_cast(view_to_refresh.lock()); -} - std::chrono::system_clock::time_point RefreshTask::currentTime() const { Int64 fake = fake_clock.load(std::memory_order::relaxed); diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 1f050a97cd9..ad9d949e18e 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -17,20 +17,21 @@ class PipelineExecutor; class StorageMaterializedView; class ASTRefreshStrategy; +struct OwnedRefreshTask; class RefreshTask : public std::enable_shared_from_this { public: /// Never call it manually, public for shared_ptr construction only - explicit RefreshTask(const ASTRefreshStrategy & strategy); + RefreshTask(StorageMaterializedView * view_, const ASTRefreshStrategy & strategy); /// The only proper way to construct task - static RefreshTaskHolder create( - const StorageMaterializedView & view, + static OwnedRefreshTask create( + StorageMaterializedView * view, ContextMutablePtr context, const DB::ASTRefreshStrategy & strategy); - void initializeAndStart(std::shared_ptr view); + void initializeAndStart(); // called at most once /// Call when renaming the materialized view. void rename(StorageID new_id); @@ -52,7 +53,14 @@ public: /// Cancel task execution void cancel(); + /// Waits for the currently running refresh attempt to complete. + /// If the refresh fails, throws an exception. + /// If no refresh is running, completes immediately, throwing an exception if previous refresh failed. + void wait(); + /// Permanently disable task scheduling and remove this table from RefreshSet. + /// Ok to call multiple times, but not in parallel. + /// Ok to call even if initializeAndStart() wasn't called or failed. void shutdown(); /// Notify dependent task @@ -66,7 +74,7 @@ public: private: LoggerPtr log = nullptr; - std::weak_ptr view_to_refresh; + StorageMaterializedView * view; /// Protects interrupt_execution and running_executor. /// Can be locked while holding `mutex`. @@ -83,10 +91,14 @@ private: mutable std::mutex mutex; RefreshSchedule refresh_schedule; - RefreshSettings refresh_settings; // TODO: populate, use, update on alter + RefreshSettings refresh_settings; + std::vector initial_dependencies; + bool refresh_append; + RefreshSet::Handle set_handle; /// StorageIDs of our dependencies that we're waiting for. + using DatabaseAndTableNameSet = std::unordered_set; DatabaseAndTableNameSet remaining_dependencies; bool time_arrived = false; @@ -111,7 +123,8 @@ private: /// E.g. for REFRESH EVERY 1 DAY, yesterday's refresh of the dependency shouldn't trigger today's /// refresh of the dependent even if it happened today (e.g. it was slow or had random spread > 1 day). std::chrono::sys_seconds next_refresh_prescribed; - std::chrono::system_clock::time_point next_refresh_with_spread; + std::chrono::system_clock::time_point next_refresh_actual; + Int64 num_retries = 0; /// Calls refreshTask() from background thread. BackgroundSchedulePool::TaskHolder refresh_task; @@ -122,6 +135,7 @@ private: /// Just for observability. RefreshInfo info; Progress progress; + std::condition_variable refresh_cv; // notified when info.state changes /// The main loop of the refresh task. It examines the state, sees what needs to be /// done and does it. If there's nothing to do at the moment, returns; it's then scheduled again, @@ -133,11 +147,14 @@ private: /// Perform an actual refresh: create new table, run INSERT SELECT, exchange tables, drop old table. /// Mutex must be unlocked. Called only from refresh_task. - void executeRefreshUnlocked(std::shared_ptr view); + void executeRefreshUnlocked(bool append); /// Assigns next_refresh_* void advanceNextRefreshTime(std::chrono::system_clock::time_point now); + /// Either advances next_refresh_actual using exponential backoff or does advanceNextRefreshTime(). + void scheduleRetryOrSkipToNextRefresh(std::chrono::system_clock::time_point now); + /// Returns true if all dependencies are fulfilled now. Refills remaining_dependencies in this case. bool arriveDependency(const StorageID & parent); bool arriveTime(); @@ -145,9 +162,24 @@ private: void interruptExecution(); - std::shared_ptr lockView(); - std::chrono::system_clock::time_point currentTime() const; }; +/// Wrapper around shared_ptr, calls shutdown() in destructor. +struct OwnedRefreshTask +{ + RefreshTaskHolder ptr; + + OwnedRefreshTask() = default; + explicit OwnedRefreshTask(RefreshTaskHolder p) : ptr(std::move(p)) {} + OwnedRefreshTask(OwnedRefreshTask &&) = default; + OwnedRefreshTask & operator=(OwnedRefreshTask &&) = default; + + ~OwnedRefreshTask() { if (ptr) ptr->shutdown(); } + + RefreshTask* operator->() const { return ptr.get(); } + RefreshTask& operator*() const { return *ptr; } + explicit operator bool() const { return ptr != nullptr; } +}; + } diff --git a/src/Storages/MaterializedView/RefreshTask_fwd.h b/src/Storages/MaterializedView/RefreshTask_fwd.h index 1f366962eb6..ff17c839dc5 100644 --- a/src/Storages/MaterializedView/RefreshTask_fwd.h +++ b/src/Storages/MaterializedView/RefreshTask_fwd.h @@ -8,8 +8,7 @@ namespace DB class RefreshTask; -using RefreshTaskStateUnderlying = UInt8; using RefreshTaskHolder = std::shared_ptr; -using RefreshTaskObserver = std::weak_ptr; +using RefreshTaskList = std::list; } diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp deleted file mode 100644 index 7354243732c..00000000000 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp +++ /dev/null @@ -1,507 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int INCORRECT_QUERY; -} - -namespace -{ - -template -void extractReferenceVectorFromLiteral(ApproximateNearestNeighborInformation::Embedding & reference_vector, Literal literal) -{ - Float64 float_element_of_reference_vector; - Int64 int_element_of_reference_vector; - - for (const auto & value : literal.value()) - { - if (value.tryGet(float_element_of_reference_vector)) - reference_vector.emplace_back(float_element_of_reference_vector); - else if (value.tryGet(int_element_of_reference_vector)) - reference_vector.emplace_back(static_cast(int_element_of_reference_vector)); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong type of elements in reference vector. Only float or int are supported."); - } -} - -ApproximateNearestNeighborInformation::Metric stringToMetric(std::string_view metric) -{ - if (metric == "L2Distance") - return ApproximateNearestNeighborInformation::Metric::L2; - else if (metric == "LpDistance") - return ApproximateNearestNeighborInformation::Metric::Lp; - else - return ApproximateNearestNeighborInformation::Metric::Unknown; -} - -} - -ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context) - : block_with_constants(KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)) - , index_granularity(context->getMergeTreeSettings().index_granularity) - , max_limit_for_ann_queries(context->getSettingsRef().max_limit_for_ann_queries) - , index_is_useful(checkQueryStructure(query_info)) -{} - -bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String metric) const -{ - if (!index_is_useful) - return true; // Query isn't supported - // If query is supported, check metrics for match - return !(stringToMetric(metric) == query_information->metric); -} - -float ApproximateNearestNeighborCondition::getComparisonDistanceForWhereQuery() const -{ - if (index_is_useful && query_information.has_value() - && query_information->type == ApproximateNearestNeighborInformation::Type::Where) - return query_information->distance; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported method for this query type"); -} - -UInt64 ApproximateNearestNeighborCondition::getLimit() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->limit; - throw Exception(ErrorCodes::LOGICAL_ERROR, "No LIMIT section in query, not supported"); -} - -std::vector ApproximateNearestNeighborCondition::getReferenceVector() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->reference_vector; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Reference vector was requested for useless or uninitialized index."); -} - -size_t ApproximateNearestNeighborCondition::getDimensions() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->reference_vector.size(); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of dimensions was requested for useless or uninitialized index."); -} - -String ApproximateNearestNeighborCondition::getColumnName() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->column_name; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name was requested for useless or uninitialized index."); -} - -ApproximateNearestNeighborInformation::Metric ApproximateNearestNeighborCondition::getMetricType() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->metric; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Metric name was requested for useless or uninitialized index."); -} - -float ApproximateNearestNeighborCondition::getPValueForLpDistance() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->p_for_lp_dist; - throw Exception(ErrorCodes::LOGICAL_ERROR, "P from LPDistance was requested for useless or uninitialized index."); -} - -ApproximateNearestNeighborInformation::Type ApproximateNearestNeighborCondition::getQueryType() const -{ - if (index_is_useful && query_information.has_value()) - return query_information->type; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query type was requested for useless or uninitialized index."); -} - -bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryInfo & query) -{ - /// RPN-s for different sections of the query - RPN rpn_prewhere_clause; - RPN rpn_where_clause; - RPN rpn_order_by_clause; - RPNElement rpn_limit; - UInt64 limit; - - ApproximateNearestNeighborInformation prewhere_info; - ApproximateNearestNeighborInformation where_info; - ApproximateNearestNeighborInformation order_by_info; - - /// Build rpns for query sections - const auto & select = query.query->as(); - - /// If query has PREWHERE clause - if (select.prewhere()) - traverseAST(select.prewhere(), rpn_prewhere_clause); - - /// If query has WHERE clause - if (select.where()) - traverseAST(select.where(), rpn_where_clause); - - /// If query has LIMIT clause - if (select.limitLength()) - traverseAtomAST(select.limitLength(), rpn_limit); - - if (select.orderBy()) // If query has ORDERBY clause - traverseOrderByAST(select.orderBy(), rpn_order_by_clause); - - /// Reverse RPNs for conveniences during parsing - std::reverse(rpn_prewhere_clause.begin(), rpn_prewhere_clause.end()); - std::reverse(rpn_where_clause.begin(), rpn_where_clause.end()); - std::reverse(rpn_order_by_clause.begin(), rpn_order_by_clause.end()); - - /// Match rpns with supported types and extract information - const bool prewhere_is_valid = matchRPNWhere(rpn_prewhere_clause, prewhere_info); - const bool where_is_valid = matchRPNWhere(rpn_where_clause, where_info); - const bool order_by_is_valid = matchRPNOrderBy(rpn_order_by_clause, order_by_info); - const bool limit_is_valid = matchRPNLimit(rpn_limit, limit); - - /// Query without a LIMIT clause or with a limit greater than a restriction is not supported - if (!limit_is_valid || max_limit_for_ann_queries < limit) - return false; - - /// Search type query in both sections isn't supported - if (prewhere_is_valid && where_is_valid) - return false; - - /// Search type should be in WHERE or PREWHERE clause - if (prewhere_is_valid || where_is_valid) - query_information = std::move(prewhere_is_valid ? prewhere_info : where_info); - - if (order_by_is_valid) - { - /// Query with valid where and order by type is not supported - if (query_information.has_value()) - return false; - - query_information = std::move(order_by_info); - } - - if (query_information) - query_information->limit = limit; - - return query_information.has_value(); -} - -void ApproximateNearestNeighborCondition::traverseAST(const ASTPtr & node, RPN & rpn) -{ - // If the node is ASTFunction, it may have children nodes - if (const auto * func = node->as()) - { - const ASTs & children = func->arguments->children; - // Traverse children nodes - for (const auto& child : children) - traverseAST(child, rpn); - } - - RPNElement element; - /// Get the data behind node - if (!traverseAtomAST(node, element)) - element.function = RPNElement::FUNCTION_UNKNOWN; - - rpn.emplace_back(std::move(element)); -} - -bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, RPNElement & out) -{ - /// Match Functions - if (const auto * function = node->as()) - { - /// Set the name - out.func_name = function->name; - - if (function->name == "L1Distance" || - function->name == "L2Distance" || - function->name == "LinfDistance" || - function->name == "cosineDistance" || - function->name == "dotProduct" || - function->name == "LpDistance") - out.function = RPNElement::FUNCTION_DISTANCE; - else if (function->name == "tuple") - out.function = RPNElement::FUNCTION_TUPLE; - else if (function->name == "array") - out.function = RPNElement::FUNCTION_ARRAY; - else if (function->name == "less" || - function->name == "greater" || - function->name == "lessOrEquals" || - function->name == "greaterOrEquals") - out.function = RPNElement::FUNCTION_COMPARISON; - else if (function->name == "_CAST") - out.function = RPNElement::FUNCTION_CAST; - else - return false; - - return true; - } - /// Match identifier - else if (const auto * identifier = node->as()) - { - out.function = RPNElement::FUNCTION_IDENTIFIER; - out.identifier.emplace(identifier->name()); - out.func_name = "column identifier"; - - return true; - } - - /// Check if we have constants behind the node - return tryCastToConstType(node, out); -} - -bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node, RPNElement & out) -{ - Field const_value; - DataTypePtr const_type; - - if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) - { - /// Check for constant types - if (const_value.getType() == Field::Types::Float64) - { - out.function = RPNElement::FUNCTION_FLOAT_LITERAL; - out.float_literal.emplace(const_value.get()); - out.func_name = "Float literal"; - return true; - } - - if (const_value.getType() == Field::Types::UInt64) - { - out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); - out.func_name = "Int literal"; - return true; - } - - if (const_value.getType() == Field::Types::Int64) - { - out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); - out.func_name = "Int literal"; - return true; - } - - if (const_value.getType() == Field::Types::Tuple) - { - out.function = RPNElement::FUNCTION_LITERAL_TUPLE; - out.tuple_literal = const_value.get(); - out.func_name = "Tuple literal"; - return true; - } - - if (const_value.getType() == Field::Types::Array) - { - out.function = RPNElement::FUNCTION_LITERAL_ARRAY; - out.array_literal = const_value.get(); - out.func_name = "Array literal"; - return true; - } - - if (const_value.getType() == Field::Types::String) - { - out.function = RPNElement::FUNCTION_STRING_LITERAL; - out.func_name = const_value.get(); - return true; - } - } - - return false; -} - -void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node, RPN & rpn) -{ - if (const auto * expr_list = node->as()) - if (const auto * order_by_element = expr_list->children.front()->as()) - traverseAST(order_by_element->children.front(), rpn); -} - -/// Returns true and stores ApproximateNearestNeighborInformation if the query has valid WHERE clause -bool ApproximateNearestNeighborCondition::matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & ann_info) -{ - /// Fill query type field - ann_info.type = ApproximateNearestNeighborInformation::Type::Where; - - /// WHERE section must have at least 5 expressions - /// Operator->Distance(float)->DistanceFunc->Column->Tuple(Array)Func(ReferenceVector(floats)) - if (rpn.size() < 5) - return false; - - auto iter = rpn.begin(); - - /// Query starts from operator less - if (iter->function != RPNElement::FUNCTION_COMPARISON) - return false; - - const bool greater_case = iter->func_name == "greater" || iter->func_name == "greaterOrEquals"; - const bool less_case = iter->func_name == "less" || iter->func_name == "lessOrEquals"; - - ++iter; - - if (less_case) - { - if (iter->function != RPNElement::FUNCTION_FLOAT_LITERAL) - return false; - - ann_info.distance = getFloatOrIntLiteralOrPanic(iter); - if (ann_info.distance < 0) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance can't be negative. Got {}", ann_info.distance); - - ++iter; - - } - else if (!greater_case) - return false; - - auto end = rpn.end(); - if (!matchMainParts(iter, end, ann_info)) - return false; - - if (greater_case) - { - if (ann_info.reference_vector.size() < 2) - return false; - ann_info.distance = ann_info.reference_vector.back(); - if (ann_info.distance < 0) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance can't be negative. Got {}", ann_info.distance); - ann_info.reference_vector.pop_back(); - } - - /// query is ok - return true; -} - -/// Returns true and stores ANNExpr if the query has valid ORDERBY clause -bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info) -{ - /// Fill query type field - ann_info.type = ApproximateNearestNeighborInformation::Type::OrderBy; - - // ORDER BY clause must have at least 3 expressions - if (rpn.size() < 3) - return false; - - auto iter = rpn.begin(); - auto end = rpn.end(); - - return ApproximateNearestNeighborCondition::matchMainParts(iter, end, ann_info); -} - -/// Returns true and stores Length if we have valid LIMIT clause in query -bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit) -{ - if (rpn.function == RPNElement::FUNCTION_INT_LITERAL) - { - limit = rpn.int_literal.value(); - return true; - } - - return false; -} - -/// Matches dist function, referencer vector, column name -bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info) -{ - bool identifier_found = false; - - /// Matches DistanceFunc->[Column]->[Tuple(array)Func]->ReferenceVector(floats)->[Column] - if (iter->function != RPNElement::FUNCTION_DISTANCE) - return false; - - ann_info.metric = stringToMetric(iter->func_name); - ++iter; - - if (ann_info.metric == ApproximateNearestNeighborInformation::Metric::Lp) - { - if (iter->function != RPNElement::FUNCTION_FLOAT_LITERAL && - iter->function != RPNElement::FUNCTION_INT_LITERAL) - return false; - ann_info.p_for_lp_dist = getFloatOrIntLiteralOrPanic(iter); - ++iter; - } - - if (iter->function == RPNElement::FUNCTION_IDENTIFIER) - { - identifier_found = true; - ann_info.column_name = std::move(iter->identifier.value()); - ++iter; - } - - if (iter->function == RPNElement::FUNCTION_TUPLE || iter->function == RPNElement::FUNCTION_ARRAY) - ++iter; - - if (iter->function == RPNElement::FUNCTION_LITERAL_TUPLE) - { - extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->tuple_literal); - ++iter; - } - - if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY) - { - extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal); - ++iter; - } - - /// further conditions are possible if there is no tuple or array, or no identifier is found - /// the tuple or array can be inside a cast function. For other cases, see the loop after this condition - if (iter != end && iter->function == RPNElement::FUNCTION_CAST) - { - ++iter; - /// Cast should be made to array or tuple - if (!iter->func_name.starts_with("Array") && !iter->func_name.starts_with("Tuple")) - return false; - ++iter; - if (iter->function == RPNElement::FUNCTION_LITERAL_TUPLE) - { - extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->tuple_literal); - ++iter; - } - else if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY) - { - extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal); - ++iter; - } - else - return false; - } - - while (iter != end) - { - if (iter->function == RPNElement::FUNCTION_FLOAT_LITERAL || - iter->function == RPNElement::FUNCTION_INT_LITERAL) - ann_info.reference_vector.emplace_back(getFloatOrIntLiteralOrPanic(iter)); - else if (iter->function == RPNElement::FUNCTION_IDENTIFIER) - { - if (identifier_found) - return false; - ann_info.column_name = std::move(iter->identifier.value()); - identifier_found = true; - } - else - return false; - - ++iter; - } - - /// Final checks of correctness - return identifier_found && !ann_info.reference_vector.empty(); -} - -/// Gets float or int from AST node -float ApproximateNearestNeighborCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter) -{ - if (iter->float_literal.has_value()) - return iter->float_literal.value(); - if (iter->int_literal.has_value()) - return static_cast(iter->int_literal.value()); - throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong parsed AST in buildRPN\n"); -} - -} diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3a44359b537..195aa4fdc10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -428,7 +428,7 @@ std::pair IMergeTreeDataPart::getMinMaxDate() const if (storage.minmax_idx_date_column_pos != -1 && minmax_idx->initialized) { const auto & hyperrectangle = minmax_idx->hyperrectangle[storage.minmax_idx_date_column_pos]; - return {DayNum(hyperrectangle.left.get()), DayNum(hyperrectangle.right.get())}; + return {DayNum(hyperrectangle.left.safeGet()), DayNum(hyperrectangle.right.safeGet())}; } else return {}; @@ -444,15 +444,15 @@ std::pair IMergeTreeDataPart::getMinMaxTime() const if (hyperrectangle.left.getType() == Field::Types::UInt64) { assert(hyperrectangle.right.getType() == Field::Types::UInt64); - return {hyperrectangle.left.get(), hyperrectangle.right.get()}; + return {hyperrectangle.left.safeGet(), hyperrectangle.right.safeGet()}; } /// The case of DateTime64 else if (hyperrectangle.left.getType() == Field::Types::Decimal64) { assert(hyperrectangle.right.getType() == Field::Types::Decimal64); - auto left = hyperrectangle.left.get>(); - auto right = hyperrectangle.right.get>(); + auto left = hyperrectangle.left.safeGet>(); + auto right = hyperrectangle.right.safeGet>(); assert(left.getScale() == right.getScale()); @@ -749,8 +749,16 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks /// Probably there is something wrong with files of this part. /// So it can be helpful to add to the error message some information about those files. String files_in_part; + for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next()) - files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name())); + { + std::string file_info; + if (!getDataPartStorage().isDirectory(it->name())) + file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name())); + + files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info); + + } if (!files_in_part.empty()) e->addMessage("Part contains files: {}", files_in_part); if (isEmpty()) @@ -1654,11 +1662,9 @@ void IMergeTreeDataPart::loadColumns(bool require) } -/// Project part / part with project parts / compact part doesn't support LWD. bool IMergeTreeDataPart::supportLightweightDeleteMutate() const { - return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact) && - parent_part == nullptr && projection_parts.empty(); + return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact); } bool IMergeTreeDataPart::hasLightweightDelete() const @@ -2141,7 +2147,27 @@ void IMergeTreeDataPart::checkConsistencyBase() const } } - checksums.checkSizes(getDataPartStorage()); + const auto & data_part_storage = getDataPartStorage(); + for (const auto & [filename, checksum] : checksums.files) + { + try + { + checksum.checkSize(data_part_storage, filename); + } + catch (const Exception & ex) + { + /// For projection parts check will mark them broken in loadProjections + if (!parent_part && filename.ends_with(".proj")) + { + std::string projection_name = fs::path(filename).stem(); + LOG_INFO(storage.log, "Projection {} doesn't exist on start for part {}, marking it as broken", projection_name, name); + if (hasProjection(projection_name)) + markProjectionPartAsBroken(projection_name, ex.message(), ex.code()); + } + else + throw; + } + } } else { diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index dfb43c4e75d..1ed096fae17 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -349,7 +349,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ false); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ false); if (prefix.empty()) return false; @@ -370,7 +370,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ true); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ true); if (prefix.empty()) return false; @@ -391,7 +391,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = value.get(); + String prefix = value.safeGet(); if (prefix.empty()) return false; @@ -412,7 +412,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - const String & expression = value.get(); + const String & expression = value.safeGet(); /// This optimization can't process alternation - this would require /// a comprehensive parsing of regular expression. @@ -888,13 +888,22 @@ static Field applyFunctionForField( return (*col)[0]; } +/// applyFunction will execute the function with one `field` or the column which `field` refers to. static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { + chassert(func != nullptr); /// Fallback for fields without block reference. if (field.isExplicit()) return applyFunctionForField(func, current_type, field); - String result_name = "_" + func->getName() + "_" + toString(field.column_idx); + /// We will cache the function result inside `field.columns`, because this function will call many times + /// from many fields from same column. When the column is huge, for example there are thousands of marks, we need a cache. + /// The cache key is like `_[function_pointer]_[param_column_id]` to identify a unique pair. + WriteBufferFromOwnString buf; + writeText("_", buf); + writePointerHex(func.get(), buf); + writeText("_" + toString(field.column_idx), buf); + String result_name = buf.str(); const auto & columns = field.columns; size_t result_idx = columns->size(); @@ -906,6 +915,7 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & if (result_idx == columns->size()) { + /// When cache is missed, we calculate the whole column where the field comes from. This will avoid repeated calculation. ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name}); (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); @@ -1956,11 +1966,8 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable()) ? DataTypePtr(std::make_shared(common_type)) : common_type; - ColumnsWithTypeAndName arguments{ - {nullptr, key_expr_type, ""}, - {DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - auto func_cast = func_builder_cast->build(arguments); + + auto func_cast = createInternalCast({key_expr_type, {}}, common_type_maybe_nullable, CastType::nonAccurate, {}); /// If we know the given range only contains one value, then we treat all functions as positive monotonic. if (!single_point && !func_cast->hasInformationAboutMonotonicity()) @@ -2931,8 +2938,8 @@ BoolMask KeyCondition::checkInHyperrectangle( /// Let's support only the case of 2d, because I'm not confident in other cases. if (num_dimensions == 2) { - UInt64 left = key_range.left.get(); - UInt64 right = key_range.right.get(); + UInt64 left = key_range.left.safeGet(); + UInt64 right = key_range.right.safeGet(); BoolMask mask(false, true); auto hyperrectangle_intersection_callback = [&](std::array, 2> curve_hyperrectangle) diff --git a/src/Storages/MergeTree/MergeProgress.h b/src/Storages/MergeTree/MergeProgress.h index dd4922051b5..8562e81e761 100644 --- a/src/Storages/MergeTree/MergeProgress.h +++ b/src/Storages/MergeTree/MergeProgress.h @@ -8,10 +8,10 @@ namespace ProfileEvents { - extern const Event MergesTimeMilliseconds; extern const Event MergedUncompressedBytes; extern const Event MergedRows; - extern const Event Merge; + extern const Event MutatedRows; + extern const Event MutatedUncompressedBytes; } namespace DB @@ -63,18 +63,17 @@ public: void updateWatch() { UInt64 watch_curr_elapsed = merge_list_element_ptr->watch.elapsed(); - ProfileEvents::increment(ProfileEvents::MergesTimeMilliseconds, (watch_curr_elapsed - watch_prev_elapsed) / 1000000); watch_prev_elapsed = watch_curr_elapsed; } - void operator() (const Progress & value) + void operator()(const Progress & value) { - ProfileEvents::increment(ProfileEvents::MergedUncompressedBytes, value.read_bytes); - if (stage.is_first) - { - ProfileEvents::increment(ProfileEvents::MergedRows, value.read_rows); - ProfileEvents::increment(ProfileEvents::Merge); - } + if (merge_list_element_ptr->is_mutation) + updateProfileEvents(value, ProfileEvents::MutatedRows, ProfileEvents::MutatedUncompressedBytes); + else + updateProfileEvents(value, ProfileEvents::MergedRows, ProfileEvents::MergedUncompressedBytes); + + updateWatch(); merge_list_element_ptr->bytes_read_uncompressed += value.read_bytes; @@ -90,6 +89,14 @@ public: std::memory_order_relaxed); } } + +private: + void updateProfileEvents(const Progress & value, ProfileEvents::Event rows_event, ProfileEvents::Event bytes_event) const + { + ProfileEvents::increment(bytes_event, value.read_bytes); + if (stage.is_first) + ProfileEvents::increment(rows_event, value.read_rows); + } }; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index ce06adf110c..806ed930311 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,18 @@ #include #include +namespace ProfileEvents +{ + extern const Event Merge; + extern const Event MergedColumns; + extern const Event GatheredColumns; + extern const Event MergeTotalMilliseconds; + extern const Event MergeExecuteMilliseconds; + extern const Event MergeHorizontalStageExecuteMilliseconds; + extern const Event MergeVerticalStageExecuteMilliseconds; + extern const Event MergeProjectionStageExecuteMilliseconds; +} + namespace DB { @@ -62,7 +75,7 @@ static ColumnsStatistics getStatisticsForColumns( const auto * desc = all_columns.tryGet(column.name); if (desc && !desc->statistics.empty()) { - auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics); + auto statistics = MergeTreeStatisticsFactory::instance().get(*desc); all_statistics.push_back(std::move(statistics)); } } @@ -169,6 +182,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { + ProfileEvents::increment(ProfileEvents::Merge); + String local_tmp_prefix; if (global_ctx->need_prefix) { @@ -446,6 +461,13 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::getContextForNextStage() { + /// Do not increment for projection stage because time is already accounted in main task. + if (global_ctx->parent_part == nullptr) + { + ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + ProfileEvents::increment(ProfileEvents::MergeHorizontalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + } + auto new_ctx = std::make_shared(); new_ctx->rows_sources_write_buf = std::move(ctx->rows_sources_write_buf); @@ -463,8 +485,14 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNextStage() { - auto new_ctx = std::make_shared(); + /// Do not increment for projection stage because time is already accounted in main task. + if (global_ctx->parent_part == nullptr) + { + ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + ProfileEvents::increment(ProfileEvents::MergeVerticalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + } + auto new_ctx = std::make_shared(); new_ctx->need_sync = std::move(ctx->need_sync); ctx.reset(); @@ -474,9 +502,14 @@ MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNe bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute() { - assert(subtasks_iterator != subtasks.end()); - if ((this->**subtasks_iterator)()) - return true; + chassert(subtasks_iterator != subtasks.end()); + + Stopwatch watch; + bool res = (this->**subtasks_iterator)(); + ctx->elapsed_execute_ns += watch.elapsedNanoseconds(); + + if (res) + return res; /// Move to the next subtask in an array of subtasks ++subtasks_iterator; @@ -486,11 +519,20 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute() bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() { - Block block; - if (!ctx->is_cancelled() && (global_ctx->merging_executor->pull(block))) - { - global_ctx->rows_written += block.rows(); + Stopwatch watch(CLOCK_MONOTONIC_COARSE); + UInt64 step_time_ms = global_ctx->data->getSettings()->background_task_preferred_step_execution_time_ms.totalMilliseconds(); + do + { + Block block; + + if (ctx->is_cancelled() || !global_ctx->merging_executor->pull(block)) + { + finalize(); + return false; + } + + global_ctx->rows_written += block.rows(); const_cast(*global_ctx->to).write(block); UInt64 result_rows = 0; @@ -510,11 +552,14 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() global_ctx->space_reservation->update(static_cast((1. - progress) * ctx->initial_reservation)); } + } while (watch.elapsedMilliseconds() < step_time_ms); - /// Need execute again - return true; - } + /// Need execute again + return true; +} +void MergeTask::ExecuteAndFinalizeHorizontalPart::finalize() const +{ global_ctx->merging_executor.reset(); global_ctx->merged_pipeline.reset(); @@ -524,17 +569,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() if (ctx->need_remove_expired_values && global_ctx->ttl_merges_blocker->isCancelled()) throw Exception(ErrorCodes::ABORTED, "Cancelled merging parts with expired TTL"); - const auto data_settings = global_ctx->data->getSettings(); const size_t sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed; - ctx->need_sync = needSyncPart(ctx->sum_input_rows_upper_bound, sum_compressed_bytes_upper_bound, *data_settings); - - return false; + ctx->need_sync = needSyncPart(ctx->sum_input_rows_upper_bound, sum_compressed_bytes_upper_bound, *global_ctx->data->getSettings()); } - bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const { - /// No need to execute this part if it is horizontal merge. + /// No need to execute this part if it is horizontal merge. if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical) return false; @@ -708,17 +749,24 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const bool MergeTask::VerticalMergeStage::executeVerticalMergeForOneColumn() const { - Block block; - if (!global_ctx->merges_blocker->isCancelled() && !global_ctx->merge_list_element_ptr->is_cancelled.load(std::memory_order_relaxed) - && ctx->executor->pull(block)) + Stopwatch watch(CLOCK_MONOTONIC_COARSE); + UInt64 step_time_ms = global_ctx->data->getSettings()->background_task_preferred_step_execution_time_ms.totalMilliseconds(); + + do { + Block block; + + if (global_ctx->merges_blocker->isCancelled() + || global_ctx->merge_list_element_ptr->is_cancelled.load(std::memory_order_relaxed) + || !ctx->executor->pull(block)) + return false; + ctx->column_elems_written += block.rows(); ctx->column_to->write(block); + } while (watch.elapsedMilliseconds() < step_time_ms); - /// Need execute again - return true; - } - return false; + /// Need execute again + return true; } @@ -784,6 +832,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c /// Print overall profiling info. NOTE: it may duplicates previous messages { + ProfileEvents::increment(ProfileEvents::MergedColumns, global_ctx->merging_columns.size()); + ProfileEvents::increment(ProfileEvents::GatheredColumns, global_ctx->gathering_columns.size()); + double elapsed_seconds = global_ctx->merge_list_element_ptr->watch.elapsedSeconds(); LOG_DEBUG(ctx->log, "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.", @@ -906,12 +957,29 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const return false; } +MergeTask::StageRuntimeContextPtr MergeTask::MergeProjectionsStage::getContextForNextStage() +{ + /// Do not increment for projection stage because time is already accounted in main task. + /// The projection stage has its own empty projection stage which may add a drift of several milliseconds. + if (global_ctx->parent_part == nullptr) + { + ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + ProfileEvents::increment(ProfileEvents::MergeProjectionStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL); + } + + return nullptr; +} bool MergeTask::VerticalMergeStage::execute() { - assert(subtasks_iterator != subtasks.end()); - if ((this->**subtasks_iterator)()) - return true; + chassert(subtasks_iterator != subtasks.end()); + + Stopwatch watch; + bool res = (this->**subtasks_iterator)(); + ctx->elapsed_execute_ns += watch.elapsedNanoseconds(); + + if (res) + return res; /// Move to the next subtask in an array of subtasks ++subtasks_iterator; @@ -920,9 +988,14 @@ bool MergeTask::VerticalMergeStage::execute() bool MergeTask::MergeProjectionsStage::execute() { - assert(subtasks_iterator != subtasks.end()); - if ((this->**subtasks_iterator)()) - return true; + chassert(subtasks_iterator != subtasks.end()); + + Stopwatch watch; + bool res = (this->**subtasks_iterator)(); + ctx->elapsed_execute_ns += watch.elapsedNanoseconds(); + + if (res) + return res; /// Move to the next subtask in an array of subtasks ++subtasks_iterator; @@ -969,12 +1042,26 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const bool MergeTask::execute() { - assert(stages_iterator != stages.end()); - if ((*stages_iterator)->execute()) + chassert(stages_iterator != stages.end()); + const auto & current_stage = *stages_iterator; + + if (current_stage->execute()) return true; - /// Stage is finished, need initialize context for the next stage - auto next_stage_context = (*stages_iterator)->getContextForNextStage(); + /// Stage is finished, need to initialize context for the next stage and update profile events. + + UInt64 current_elapsed_ms = global_ctx->merge_list_element_ptr->watch.elapsedMilliseconds(); + UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapsed_ms; + global_ctx->prev_elapsed_ms = current_elapsed_ms; + + auto next_stage_context = current_stage->getContextForNextStage(); + + /// Do not increment for projection stage because time is already accounted in main task. + if (global_ctx->parent_part == nullptr) + { + ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms); + ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms); + } /// Move to the next stage in an array of stages ++stages_iterator; @@ -1099,7 +1186,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() /* limit_= */0, /* always_read_till_end_= */false, ctx->rows_sources_write_buf.get(), - true, ctx->blocks_are_granules_size); break; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 8b0f2130e8e..9a68b2e04ac 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -26,6 +27,12 @@ #include #include +namespace ProfileEvents +{ + extern const Event MergeHorizontalStageTotalMilliseconds; + extern const Event MergeVerticalStageTotalMilliseconds; + extern const Event MergeProjectionStageTotalMilliseconds; +} namespace DB { @@ -134,6 +141,7 @@ private: { virtual void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) = 0; virtual StageRuntimeContextPtr getContextForNextStage() = 0; + virtual ProfileEvents::Event getTotalTimeProfileEvent() const = 0; virtual bool execute() = 0; virtual ~IStage() = default; }; @@ -195,6 +203,7 @@ private: bool need_prefix; scope_guard temporary_directory_lock; + UInt64 prev_elapsed_ms{0}; }; using GlobalRuntimeContextPtr = std::shared_ptr; @@ -233,6 +242,7 @@ private: /// Dependencies for next stages std::list::const_iterator it_name_and_type; bool need_sync{false}; + UInt64 elapsed_execute_ns{0}; }; using ExecuteAndFinalizeHorizontalPartRuntimeContextPtr = std::shared_ptr; @@ -244,6 +254,7 @@ private: bool prepare(); bool executeImpl(); + void finalize() const; /// NOTE: Using pointer-to-member instead of std::function and lambda makes stacktraces much more concise and readable using ExecuteAndFinalizeHorizontalPartSubtasks = std::array; @@ -256,7 +267,6 @@ private: ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin(); - MergeAlgorithm chooseMergeAlgorithm() const; void createMergedStream(); void extractMergingAndGatheringColumns() const; @@ -268,6 +278,7 @@ private: } StageRuntimeContextPtr getContextForNextStage() override; + ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeHorizontalStageTotalMilliseconds; } ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx; GlobalRuntimeContextPtr global_ctx; @@ -307,6 +318,7 @@ private: QueryPipeline column_parts_pipeline; std::unique_ptr executor; std::unique_ptr rows_sources_read_buf{nullptr}; + UInt64 elapsed_execute_ns{0}; }; using VerticalMergeRuntimeContextPtr = std::shared_ptr; @@ -321,6 +333,7 @@ private: global_ctx = static_pointer_cast(global); } StageRuntimeContextPtr getContextForNextStage() override; + ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeVerticalStageTotalMilliseconds; } bool prepareVerticalMergeForAllColumns() const; bool executeVerticalMergeForAllColumns() const; @@ -361,6 +374,7 @@ private: MergeTasks::iterator projections_iterator; LoggerPtr log{getLogger("MergeTask::MergeProjectionsStage")}; + UInt64 elapsed_execute_ns{0}; }; using MergeProjectionsRuntimeContextPtr = std::shared_ptr; @@ -368,12 +382,15 @@ private: struct MergeProjectionsStage : public IStage { bool execute() override; + void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override { ctx = static_pointer_cast(local); global_ctx = static_pointer_cast(global); } - StageRuntimeContextPtr getContextForNextStage() override { return nullptr; } + + StageRuntimeContextPtr getContextForNextStage() override; + ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeProjectionStageTotalMilliseconds; } bool mergeMinMaxIndexAndPrepareProjections() const; bool executeProjections() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 051d52a71cd..94f6d196b99 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1146,7 +1146,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false); if (!filter_dag) return {}; @@ -2351,7 +2351,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it bool keep_shared = false; - if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) + if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication && supportsReplication()) { LOG_WARNING(log, "Since zero-copy replication is enabled we are not going to remove blobs from shared storage for {}", full_path); keep_shared = true; @@ -3359,6 +3359,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER MODIFY REFRESH is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_SQL_SECURITY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ALTER MODIFY SQL SECURITY is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -3513,7 +3517,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistics is not not safe " + "ALTER types of column {} with statistics is not safe " "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } @@ -5884,7 +5888,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc if (partition_lit && partition_lit->value.getType() == Field::Types::String) { MergeTreePartInfo::validatePartitionID(partition_ast.value->clone(), format_version); - return partition_lit->value.get(); + return partition_lit->value.safeGet(); } } @@ -5947,7 +5951,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", partition_key_value.getTypeName()); - const Tuple & tuple = partition_key_value.get(); + const Tuple & tuple = partition_key_value.safeGet(); if (tuple.size() != fields_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of fields in the partition expression: {}, must be: {}", tuple.size(), fields_count); @@ -6268,10 +6272,13 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr } else { - String partition_id = getPartitionIDFromQuery(partition, local_context); + String partition_id; + bool all = partition->as()->all; + if (!all) + partition_id = getPartitionIDFromQuery(partition, local_context); DetachedPartsInfo detached_parts = getDetachedParts(); for (const auto & part_info : detached_parts) - if (part_info.valid_name && part_info.partition_id == partition_id + if (part_info.valid_name && (all || part_info.partition_id == partition_id) && part_info.prefix != "attaching" && part_info.prefix != "deleting") renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk); } @@ -6883,7 +6890,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - AggregateFunctionCount::set(place, value.get()); + AggregateFunctionCount::set(place, value.safeGet()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); @@ -6925,7 +6932,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto * predicate = filter_dag->getOutputs().at(0); // Generate valid expressions for filtering - VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context); + VirtualColumnUtils::filterBlockWithPredicate( + predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true); rows = virtual_columns_block.rows(); part_name_column = virtual_columns_block.getByName("_part").column; @@ -7520,7 +7528,7 @@ MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & parti if (const auto * partition_lit = partition_ast->as().value->as()) { id = partition_lit->value.getType() == Field::Types::UInt64 - ? toString(partition_lit->value.get()) + ? toString(partition_lit->value.safeGet()) : partition_lit->value.safeGet(); prefixed = true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index b327480fa92..3ef36ce364c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -100,12 +100,6 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r } } -void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const -{ - for (const auto & [name, checksum] : files) - checksum.checkSize(storage, name); -} - UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const { UInt64 res = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 05178dc3a60..dc52f1ada2b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -65,9 +65,6 @@ struct MergeTreeDataPartChecksums static bool isBadChecksumsErrorCode(int code); - /// Checks that the directory contains all the needed files of the correct size. Does not check the checksum. - void checkSizes(const IDataPartStorage & storage) const; - /// Returns false if the checksum is too old. bool read(ReadBuffer & in); /// Assume that header with version (the first line) is read diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 52d12c9db7d..f4be7619fc8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -164,7 +164,7 @@ void writeColumnSingleGranule( serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; - serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; + serialize_settings.object_and_dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 3fbabe1dd52..3edcce74b09 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -132,6 +132,10 @@ void MergeTreeDataPartWriterWide::addStreams( { assert(!substream_path.empty()); + /// Don't create streams for ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + auto full_stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); String stream_name; @@ -205,6 +209,10 @@ ISerialization::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGett { return [&, this] (const ISerialization::SubstreamPath & substream_path) -> WriteBuffer * { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return nullptr; + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; auto stream_name = getStreamName(column, substream_path); @@ -367,6 +375,10 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( min_compress_block_size = settings.min_compress_block_size; getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; auto stream_name = getStreamName(name_and_type, substream_path); @@ -405,6 +417,10 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. serialization->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; auto stream_name = getStreamName(name_and_type, substream_path); @@ -656,7 +672,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); - serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX; + serialize_settings.object_and_dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::SUFFIX; getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a6ef0063069..58b23152016 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -48,7 +49,6 @@ #include #include -#include namespace CurrentMetrics { @@ -369,7 +369,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// If sample and final are used together no need to calculate sampling expression twice. /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. - ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); + ASTPtr sampling_key_ast; if (final) { @@ -377,6 +377,12 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// We do spoil available_real_columns here, but it is not used later. available_real_columns.emplace_back(sampling_key.column_names[0], std::move(sampling_column_type)); } + else + { + sampling_key_ast = metadata_snapshot->getSamplingKeyAST()->clone(); + } + + chassert(sampling_key_ast != nullptr); if (has_lower_limit) { @@ -1406,11 +1412,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) reader.read(granule); - auto ann_condition = std::dynamic_pointer_cast(condition); - if (ann_condition != nullptr) + if (index_helper->isVectorSimilarityIndex()) { /// An array of indices of useful ranges. - auto result = ann_condition->getUsefulRanges(granule); + auto result = condition->getUsefulRanges(granule); for (auto range : result) { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cb02f1cf5f2..f29d715e791 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -451,8 +451,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.get()); - DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.get()); + DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.safeGet()); + DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.safeGet()); const auto & date_lut = DateLUT::serverTimezoneInstance(); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.cpp b/src/Storages/MergeTree/MergeTreeIOSettings.cpp index 58c3bd28d6a..24cb25afe47 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeIOSettings.cpp @@ -27,7 +27,6 @@ MergeTreeWriterSettings::MergeTreeWriterSettings( , rewrite_primary_key(rewrite_primary_key_) , blocks_are_granules_size(blocks_are_granules_size_) , query_write_settings(query_write_settings_) - , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index c79ca1e66ee..47b174b2e29 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -77,8 +77,6 @@ struct MergeTreeWriterSettings bool blocks_are_granules_size; WriteSettings query_write_settings; - size_t max_threads_for_annoy_index_creation; - size_t low_cardinality_max_dictionary_size; bool low_cardinality_use_single_dictionary_for_part; bool use_compact_variant_discriminators_serialization; diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp deleted file mode 100644 index 497e86334f3..00000000000 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ /dev/null @@ -1,416 +0,0 @@ -#ifdef ENABLE_ANNOY - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int INCORRECT_DATA; - extern const int INCORRECT_NUMBER_OF_COLUMNS; - extern const int INCORRECT_QUERY; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -template -AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions) - : Base::AnnoyIndex(static_cast(dimensions)) -{ -} - -template -void AnnoyIndexWithSerialization::serialize(WriteBuffer & ostr) const -{ - chassert(Base::_built); - writeIntBinary(Base::_s, ostr); - writeIntBinary(Base::_n_items, ostr); - writeIntBinary(Base::_n_nodes, ostr); - writeIntBinary(Base::_nodes_size, ostr); - writeIntBinary(Base::_K, ostr); - writeIntBinary(Base::_seed, ostr); - writeVectorBinary(Base::_roots, ostr); - ostr.write(reinterpret_cast(Base::_nodes), Base::_s * Base::_n_nodes); -} - -template -void AnnoyIndexWithSerialization::deserialize(ReadBuffer & istr) -{ - chassert(!Base::_built); - readIntBinary(Base::_s, istr); - readIntBinary(Base::_n_items, istr); - readIntBinary(Base::_n_nodes, istr); - readIntBinary(Base::_nodes_size, istr); - readIntBinary(Base::_K, istr); - readIntBinary(Base::_seed, istr); - readVectorBinary(Base::_roots, istr); - Base::_nodes = realloc(Base::_nodes, Base::_s * Base::_n_nodes); - istr.readStrict(reinterpret_cast(Base::_nodes), Base::_s * Base::_n_nodes); - - Base::_fd = 0; - // set flags - Base::_loaded = false; - Base::_verbose = false; - Base::_on_disk = false; - Base::_built = true; -} - -template -size_t AnnoyIndexWithSerialization::getDimensions() const -{ - return Base::get_f(); -} - - -template -MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , index(nullptr) -{} - -template -MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy( - const String & index_name_, - const Block & index_sample_block_, - AnnoyIndexWithSerializationPtr index_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , index(std::move(index_)) -{} - -template -void MergeTreeIndexGranuleAnnoy::serializeBinary(WriteBuffer & ostr) const -{ - /// Number of dimensions is required in the index constructor, - /// so it must be written and read separately from the other part - writeIntBinary(static_cast(index->getDimensions()), ostr); // write dimension - index->serialize(ostr); -} - -template -void MergeTreeIndexGranuleAnnoy::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/) -{ - UInt64 dimension; - readIntBinary(dimension, istr); - index = std::make_shared>(dimension); - index->deserialize(istr); -} - -template -MergeTreeIndexAggregatorAnnoy::MergeTreeIndexAggregatorAnnoy( - const String & index_name_, - const Block & index_sample_block_, - UInt64 trees_, - size_t max_threads_for_creation_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , trees(trees_) - , max_threads_for_creation(max_threads_for_creation_) -{} - -template -MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy::getGranuleAndReset() -{ - int threads = (max_threads_for_creation == 0) ? -1 : static_cast(max_threads_for_creation); - /// clang-tidy reports a false positive: it considers %p with an outdated pointer in fprintf() (used by logging which we don't do) dereferencing - index->build(static_cast(trees), threads); - auto granule = std::make_shared>(index_name, index_sample_block, index); - index = nullptr; - return granule; -} - -template -void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, size_t limit) -{ - if (*pos >= block.rows()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.", - *pos, block.rows()); - - size_t rows_read = std::min(limit, block.rows() - *pos); - - if (rows_read == 0) - return; - - if (rows_read > std::numeric_limits::max()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than 4B rows per index granule."); - - if (index_sample_block.columns() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column"); - - const String & index_column_name = index_sample_block.getByPosition(0).name; - ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); - - if (const auto & column_array = typeid_cast(column_cut.get())) - { - const auto & column_array_data = column_array->getData(); - const auto & column_array_data_float = typeid_cast(column_array_data); - const auto & column_array_data_float_data = column_array_data_float.getData(); - - const auto & column_array_offsets = column_array->getOffsets(); - const size_t num_rows = column_array_offsets.size(); - - if (column_array->empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty"); - - /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays - /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default - /// value which is also empty. - if (column_array->isDefaultAt(0)) - throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); - - /// Check all sizes are the same - size_t dimension = column_array_offsets[0]; - for (size_t i = 0; i < num_rows - 1; ++i) - if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - - /// Also check that previously inserted blocks have the same size as this block. - /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across - /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42. - if (index && index->getDimensions() != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - - if (!index) - index = std::make_shared>(dimension); - - /// Add all rows of block - index->add_item(index->get_n_items(), column_array_data_float_data.data()); - for (size_t current_row = 1; current_row < num_rows; ++current_row) - index->add_item(index->get_n_items(), &column_array_data_float_data[column_array_offsets[current_row - 1]]); - } - else if (const auto & column_tuple = typeid_cast(column_cut.get())) - { - const auto & column_tuple_columns = column_tuple->getColumns(); - - /// TODO check if calling index->add_item() directly on the block's tuples is faster than materializing everything - std::vector> data(column_tuple->size(), std::vector()); - for (const auto & column : column_tuple_columns) - { - const auto & pod_array = typeid_cast(column.get())->getData(); - for (size_t i = 0; i < pod_array.size(); ++i) - data[i].push_back(pod_array[i]); - } - - if (data.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); - - if (!index) - index = std::make_shared>(data[0].size()); - - for (const auto & item : data) - index->add_item(index->get_n_items(), item.data()); - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column"); - - *pos += rows_read; -} - - -MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy( - const IndexDescription & /*index_description*/, - const SelectQueryInfo & query, - const String & distance_function_, - ContextPtr context) - : ann_condition(query, context) - , distance_function(distance_function_) - , search_k(context->getSettingsRef().annoy_index_search_k_nodes) -{} - -bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const -{ - throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); -} - -bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const -{ - return ann_condition.alwaysUnknownOrTrue(distance_function); -} - -std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const -{ - if (distance_function == DISTANCE_FUNCTION_L2) - return getUsefulRangesImpl(idx_granule); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return getUsefulRangesImpl(idx_granule); - std::unreachable(); -} - -template -std::vector MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const -{ - const UInt64 limit = ann_condition.getLimit(); - const UInt64 index_granularity = ann_condition.getIndexGranularity(); - const std::optional comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where - ? std::optional(ann_condition.getComparisonDistanceForWhereQuery()) - : std::nullopt; - - if (comparison_distance && comparison_distance.value() < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance"); - - const std::vector reference_vector = ann_condition.getReferenceVector(); - - const auto granule = std::dynamic_pointer_cast>(idx_granule); - if (granule == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); - - const AnnoyIndexWithSerializationPtr annoy = granule->index; - - if (ann_condition.getDimensions() != annoy->getDimensions()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " - "does not match the dimension in the index ({})", - ann_condition.getDimensions(), annoy->getDimensions()); - - std::vector neighbors; /// indexes of dots which were closest to the reference vector - std::vector distances; - neighbors.reserve(limit); - distances.reserve(limit); - - annoy->get_nns_by_vector(reference_vector.data(), limit, static_cast(search_k), &neighbors, &distances); - - chassert(neighbors.size() == distances.size()); - - std::vector granules; - granules.reserve(neighbors.size()); - for (size_t i = 0; i < neighbors.size(); ++i) - { - if (comparison_distance && distances[i] > comparison_distance) - continue; - granules.push_back(neighbors[i] / index_granularity); - } - - /// make unique - std::sort(granules.begin(), granules.end()); - granules.erase(std::unique(granules.begin(), granules.end()), granules.end()); - - return granules; -} - -MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 trees_, const String & distance_function_) - : IMergeTreeIndex(index_) - , trees(trees_) - , distance_function(distance_function_) -{} - -MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const -{ - if (distance_function == DISTANCE_FUNCTION_L2) - return std::make_shared>(index.name, index.sample_block); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return std::make_shared>(index.name, index.sample_block); - std::unreachable(); -} - -MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator(const MergeTreeWriterSettings & settings) const -{ - /// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171 - if (distance_function == DISTANCE_FUNCTION_L2) - return std::make_shared>(index.name, index.sample_block, trees, settings.max_threads_for_annoy_index_creation); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return std::make_shared>(index.name, index.sample_block, trees, settings.max_threads_for_annoy_index_creation); - std::unreachable(); -} - -MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const -{ - return std::make_shared(index, query, distance_function, context); -}; - -MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAG *, ContextPtr) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); -} - -MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) -{ - static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; - String distance_function = DEFAULT_DISTANCE_FUNCTION; - if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); - - static constexpr auto DEFAULT_TREES = 100uz; - UInt64 trees = DEFAULT_TREES; - if (index.arguments.size() > 1) - trees = index.arguments[1].get(); - - return std::make_shared(index, trees, distance_function); -} - -void annoyIndexValidator(const IndexDescription & index, bool /* attach */) -{ - /// Check number and type of Annoy index arguments: - - if (index.arguments.size() > 2) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters"); - - if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of Annoy index must be of type String"); - - if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::UInt64) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Number of trees argument of Annoy index must be of type UInt64"); - - /// Check that the index is created on a single column - - if (index.column_names.size() != 1 || index.data_types.size() != 1) - throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Annoy indexes must be created on a single column"); - - /// Check that a supported metric was passed as first argument - - if (!index.arguments.empty()) - { - String distance_name = index.arguments[0].get(); - if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) - throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); - } - - /// Check data type of indexed column: - - auto throw_unsupported_underlying_column_exception = []() - { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])"); - }; - - DataTypePtr data_type = index.sample_block.getDataTypes()[0]; - - if (const auto * data_type_array = typeid_cast(data_type.get())) - { - TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); - if (!WhichDataType(nested_type_index).isFloat32()) - throw_unsupported_underlying_column_exception(); - } - else if (const auto * data_type_tuple = typeid_cast(data_type.get())) - { - const DataTypes & inner_types = data_type_tuple->getElements(); - for (const auto & inner_type : inner_types) - { - TypeIndex nested_type_index = inner_type->getTypeId(); - if (!WhichDataType(nested_type_index).isFloat32()) - throw_unsupported_underlying_column_exception(); - } - } - else - throw_unsupported_underlying_column_exception(); -} - -} - -#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h deleted file mode 100644 index 282920c608e..00000000000 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ /dev/null @@ -1,112 +0,0 @@ -#pragma once - -#ifdef ENABLE_ANNOY - -#include - -#include -#include - -namespace DB -{ - -template -class AnnoyIndexWithSerialization : public Annoy::AnnoyIndex -{ - using Base = Annoy::AnnoyIndex; - -public: - explicit AnnoyIndexWithSerialization(size_t dimensions); - void serialize(WriteBuffer & ostr) const; - void deserialize(ReadBuffer & istr); - size_t getDimensions() const; -}; - -template -using AnnoyIndexWithSerializationPtr = std::shared_ptr>; - - -template -struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule -{ - MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_); - MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_, AnnoyIndexWithSerializationPtr index_); - - ~MergeTreeIndexGranuleAnnoy() override = default; - - void serializeBinary(WriteBuffer & ostr) const override; - void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; - - bool empty() const override { return !index.get(); } - - const String index_name; - const Block index_sample_block; - AnnoyIndexWithSerializationPtr index; -}; - - -template -struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator -{ - MergeTreeIndexAggregatorAnnoy(const String & index_name_, const Block & index_sample_block, UInt64 trees, size_t max_threads_for_creation); - ~MergeTreeIndexAggregatorAnnoy() override = default; - - bool empty() const override { return !index || index->get_n_items() == 0; } - MergeTreeIndexGranulePtr getGranuleAndReset() override; - void update(const Block & block, size_t * pos, size_t limit) override; - - const String index_name; - const Block index_sample_block; - const UInt64 trees; - const size_t max_threads_for_creation; - AnnoyIndexWithSerializationPtr index; -}; - - -class MergeTreeIndexConditionAnnoy final : public IMergeTreeIndexConditionApproximateNearestNeighbor -{ -public: - MergeTreeIndexConditionAnnoy( - const IndexDescription & index_description, - const SelectQueryInfo & query, - const String & distance_function, - ContextPtr context); - - ~MergeTreeIndexConditionAnnoy() override = default; - - bool alwaysUnknownOrTrue() const override; - bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - std::vector getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override; - -private: - template - std::vector getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const; - - const ApproximateNearestNeighborCondition ann_condition; - const String distance_function; - const Int64 search_k; -}; - - -class MergeTreeIndexAnnoy final : public IMergeTreeIndex -{ -public: - - MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 trees_, const String & distance_function_); - - ~MergeTreeIndexAnnoy() override = default; - - MergeTreeIndexGranulePtr createIndexGranule() const override; - MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; - MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; - bool isVectorSearch() const override { return true; } - -private: - const UInt64 trees; - const String distance_function; -}; - -} - -#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index dc314ce53d4..b796ed7114e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -348,19 +348,19 @@ bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTre { if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -703,7 +703,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( const bool is_nullable = actual_type->isNullable(); auto mutable_column = actual_type->createColumn(); - for (const auto & f : value_field.get()) + for (const auto & f : value_field.safeGet()) { if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; @@ -774,7 +774,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (which.isTuple() && key_node_function_name == "tuple") { - const Tuple & tuple = value_field.get(); + const Tuple & tuple = value_field.safeGet(); const auto * value_tuple_data_type = typeid_cast(value_type.get()); if (tuple.size() != key_node_function_arguments_size) @@ -963,7 +963,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach) { const auto & argument = index.arguments[0]; - if (!attach && (argument.getType() != Field::Types::Float64 || argument.get() < 0 || argument.get() > 1)) + if (!attach && (argument.getType() != Field::Types::Float64 || argument.safeGet() < 0 || argument.safeGet() > 1)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The BloomFilter false positive must be a double number between 0 and 1."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 5b6813d12e3..857b7903588 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -341,19 +341,19 @@ bool MergeTreeConditionBloomFilterText::extractAtomFromTree(const RPNBuilderTree if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -493,7 +493,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - auto value = const_value.get(); + auto value = const_value.safeGet(); if (is_case_insensitive_scenario) std::ranges::transform(value, value.begin(), [](const auto & c) { return static_cast(std::tolower(c)); }); @@ -509,7 +509,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -519,7 +519,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -529,7 +529,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -538,7 +538,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -547,7 +547,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -556,7 +556,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -565,7 +565,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, true, false); return true; } @@ -574,7 +574,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, false, true); return true; } @@ -589,13 +589,13 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector> bloom_filters; bloom_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; bloom_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); if (function_name == "multiSearchAny") { @@ -615,7 +615,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_MATCH; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -743,11 +743,11 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( { if (index.type == NgramTokenExtractor::getName()) { - size_t n = index.arguments[0].get(); + size_t n = index.arguments[0].safeGet(); BloomFilterParameters params( - index.arguments[1].get(), - index.arguments[2].get(), - index.arguments[3].get()); + index.arguments[1].safeGet(), + index.arguments[2].safeGet(), + index.arguments[3].safeGet()); auto tokenizer = std::make_unique(n); @@ -756,9 +756,9 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( else if (index.type == SplitTokenExtractor::getName()) { BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); auto tokenizer = std::make_unique(); @@ -815,9 +815,9 @@ void bloomFilterIndexTextValidator(const IndexDescription & index, bool /*attach /// Just validate BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index cd6af68ebcc..b5c6bb95d37 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -74,7 +74,7 @@ void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr, MergeTr for (auto & gin_filter : gin_filters) { size_serialization->deserializeBinary(field_rows, istr, {}); - size_t filter_size = field_rows.get(); + size_t filter_size = field_rows.safeGet(); gin_filter.getFilter().resize(filter_size); if (filter_size == 0) @@ -379,19 +379,19 @@ bool MergeTreeConditionFullText::traverseAtomAST(const RPNBuilderTreeNode & node /// Check constant like in KeyCondition if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -530,7 +530,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -539,7 +539,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -549,7 +549,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -558,7 +558,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -567,7 +567,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -576,7 +576,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -585,7 +585,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -594,7 +594,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, true, false); return true; } @@ -603,7 +603,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, false, true); return true; } @@ -615,13 +615,13 @@ bool MergeTreeConditionFullText::traverseASTEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector gin_filters; gin_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; gin_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), gin_filters.back().back(), false, false); } out.set_gin_filters = std::move(gin_filters); @@ -632,7 +632,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_MATCH; - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -776,8 +776,8 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( MergeTreeIndexPtr fullTextIndexCreator( const IndexDescription & index) { - size_t n = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t n = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(n, max_rows); /// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor @@ -826,12 +826,12 @@ void fullTextIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.arguments[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INCORRECT_QUERY, "The second full text index argument must be UInt64"); - if (index.arguments[1].get() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].get() < MIN_ROWS_PER_POSTINGS_LIST) + if (index.arguments[1].safeGet() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].safeGet() < MIN_ROWS_PER_POSTINGS_LIST) throw Exception(ErrorCodes::INCORRECT_QUERY, "The maximum rows per postings list must be no less than {}", MIN_ROWS_PER_POSTINGS_LIST); } /// Just validate - size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(ngrams, max_rows_per_postings_list); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 2a45ab1d927..2b924284857 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -103,8 +103,19 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num /// This is a heuristic to respect min_marks_to_read which is ignored by MergeTreeReadPool in case of remote disk. /// See comment in IMergeTreeSelectAlgorithm. - if (min_marks_to_read && from_mark + 2 * min_marks_to_read <= to_mark) - to_mark = from_mark + min_marks_to_read; + if (min_marks_to_read) + { + // check overflow + size_t min_marks_to_read_2 = 0; + bool overflow = common::mulOverflow(min_marks_to_read, 2, min_marks_to_read_2); + + size_t to_mark_overwrite = 0; + if (!overflow) + overflow = common::addOverflow(from_mark, min_marks_to_read_2, to_mark_overwrite); + + if (!overflow && to_mark_overwrite < to_mark) + to_mark = to_mark_overwrite; + } return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index cd8065ecadf..abf3ae56376 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -37,7 +37,7 @@ void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr, Merge Field field_met; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_met, istr, {}); - met = field_met.get(); + met = field_met.safeGet(); is_empty = false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp new file mode 100644 index 00000000000..29de109d4fc --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp @@ -0,0 +1,45 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_INDEX; +} + +MergeTreeIndexLegacyVectorSimilarity::MergeTreeIndexLegacyVectorSimilarity(const IndexDescription & index_) + : IMergeTreeIndex(index_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexLegacyVectorSimilarity::createIndexGranule() const +{ + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'"); +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexLegacyVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings &) const +{ + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'"); +} + +MergeTreeIndexConditionPtr MergeTreeIndexLegacyVectorSimilarity::createIndexCondition(const SelectQueryInfo &, ContextPtr) const +{ + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'"); +}; + +MergeTreeIndexConditionPtr MergeTreeIndexLegacyVectorSimilarity::createIndexCondition(const ActionsDAG *, ContextPtr) const +{ + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'"); +} + +MergeTreeIndexPtr legacyVectorSimilarityIndexCreator(const IndexDescription & index) +{ + return std::make_shared(index); +} + +void legacyVectorSimilarityIndexValidator(const IndexDescription &, bool) +{ +} + +} diff --git a/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h new file mode 100644 index 00000000000..1015401823d --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +/// Walking corpse implementation for removed skipping index of type "annoy" and "usearch". +/// Its only purpose is to allow loading old tables with indexes of these types. +/// Data insertion and index usage/search will throw an exception, suggesting to migrate to "vector_similarity" indexes. + +namespace DB +{ + +class MergeTreeIndexLegacyVectorSimilarity : public IMergeTreeIndex +{ +public: + explicit MergeTreeIndexLegacyVectorSimilarity(const IndexDescription & index_); + ~MergeTreeIndexLegacyVectorSimilarity() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings &) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo &, ContextPtr) const; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; + + bool isVectorSimilarityIndex() const override { return true; } +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index a92df4ac72d..fa242fccbc1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -97,7 +97,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); - size_t rows_to_read = field_rows.get(); + size_t rows_to_read = field_rows.safeGet(); if (rows_to_read == 0) return; @@ -591,7 +591,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) { - size_t max_rows = index.arguments[0].get(); + size_t max_rows = index.arguments[0].safeGet(); return std::make_shared(index, max_rows); } diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp deleted file mode 100644 index 59a4b0fbf9c..00000000000 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ /dev/null @@ -1,463 +0,0 @@ -#ifdef ENABLE_USEARCH - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpass-failed" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event USearchAddCount; - extern const Event USearchAddVisitedMembers; - extern const Event USearchAddComputedDistances; - extern const Event USearchSearchCount; - extern const Event USearchSearchVisitedMembers; - extern const Event USearchSearchComputedDistances; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_ALLOCATE_MEMORY; - extern const int ILLEGAL_COLUMN; - extern const int INCORRECT_DATA; - extern const int INCORRECT_NUMBER_OF_COLUMNS; - extern const int INCORRECT_QUERY; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -std::unordered_map nameToScalarKind = { - {"f64", unum::usearch::scalar_kind_t::f64_k}, - {"f32", unum::usearch::scalar_kind_t::f32_k}, - {"f16", unum::usearch::scalar_kind_t::f16_k}, - {"i8", unum::usearch::scalar_kind_t::i8_k}}; - -} - -template -USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dimensions, unum::usearch::scalar_kind_t scalar_kind) - : Base(Base::make(unum::usearch::metric_punned_t(dimensions, Metric, scalar_kind))) -{ -} - -template -void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const -{ - auto callback = [&ostr](void * from, size_t n) - { - ostr.write(reinterpret_cast(from), n); - return true; - }; - - Base::save_to_stream(callback); -} - -template -void USearchIndexWithSerialization::deserialize(ReadBuffer & istr) -{ - auto callback = [&istr](void * from, size_t n) - { - istr.readStrict(reinterpret_cast(from), n); - return true; - }; - - Base::load_from_stream(callback); -} - -template -size_t USearchIndexWithSerialization::getDimensions() const -{ - return Base::dimensions(); -} - -template -MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( - const String & index_name_, - const Block & index_sample_block_, - unum::usearch::scalar_kind_t scalar_kind_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , scalar_kind(scalar_kind_) - , index(nullptr) -{ -} - -template -MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( - const String & index_name_, - const Block & index_sample_block_, - unum::usearch::scalar_kind_t scalar_kind_, - USearchIndexWithSerializationPtr index_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , scalar_kind(scalar_kind_) - , index(std::move(index_)) -{ -} - -template -void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const -{ - /// Number of dimensions is required in the index constructor, - /// so it must be written and read separately from the other part - writeIntBinary(static_cast(index->getDimensions()), ostr); // write dimension - index->serialize(ostr); -} - -template -void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/) -{ - UInt64 dimension; - readIntBinary(dimension, istr); - index = std::make_shared>(dimension, scalar_kind); - index->deserialize(istr); -} - -template -MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch( - const String & index_name_, - const Block & index_sample_block_, - unum::usearch::scalar_kind_t scalar_kind_) - : index_name(index_name_) - , index_sample_block(index_sample_block_) - , scalar_kind(scalar_kind_) -{ -} - -template -MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch::getGranuleAndReset() -{ - auto granule = std::make_shared>(index_name, index_sample_block, scalar_kind, index); - index = nullptr; - return granule; -} - -template -void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos, size_t limit) -{ - if (*pos >= block.rows()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.", - *pos, - block.rows()); - - size_t rows_read = std::min(limit, block.rows() - *pos); - - if (rows_read == 0) - return; - - if (rows_read > std::numeric_limits::max()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than 4B rows per index granule."); - - if (index_sample_block.columns() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column"); - - const String & index_column_name = index_sample_block.getByPosition(0).name; - ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); - - if (const auto & column_array = typeid_cast(column_cut.get())) - { - const auto & column_array_data = column_array->getData(); - const auto & column_array_data_float = typeid_cast(column_array_data); - const auto & column_array_data_float_data = column_array_data_float.getData(); - - const auto & column_array_offsets = column_array->getOffsets(); - const size_t num_rows = column_array_offsets.size(); - - if (column_array->empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty"); - - /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays - /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default - /// values which is also empty. - if (column_array->isDefaultAt(0)) - throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); - - /// Check all sizes are the same - size_t dimension = column_array_offsets[0]; - for (size_t i = 0; i < num_rows - 1; ++i) - if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - - /// Also check that previously inserted blocks have the same size as this block. - /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across - /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42. - if (index && index->getDimensions() != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - - if (!index) - index = std::make_shared>(dimension, scalar_kind); - - /// Add all rows of block - if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows))) - throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); - - for (size_t current_row = 0; current_row < num_rows; ++current_row) - { - auto rc = index->add(static_cast(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]); - if (!rc) - throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release()); - - ProfileEvents::increment(ProfileEvents::USearchAddCount); - ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members); - ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, rc.computed_distances); - } - } - else if (const auto & column_tuple = typeid_cast(column_cut.get())) - { - const auto & column_tuple_columns = column_tuple->getColumns(); - std::vector> data(column_tuple->size(), std::vector()); - for (const auto & column : column_tuple_columns) - { - const auto & pod_array = typeid_cast(column.get())->getData(); - for (size_t i = 0; i < pod_array.size(); ++i) - data[i].push_back(pod_array[i]); - } - - if (data.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); - - if (!index) - index = std::make_shared>(data[0].size(), scalar_kind); - - if (!index->reserve(unum::usearch::ceil2(index->size() + data.size()))) - throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); - - for (const auto & item : data) - { - auto rc = index->add(static_cast(index->size()), item.data()); - if (!rc) - throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release()); - - ProfileEvents::increment(ProfileEvents::USearchAddCount); - ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members); - ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, rc.computed_distances); - } - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column"); - - *pos += rows_read; -} - -MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch( - const IndexDescription & /*index_description*/, - const SelectQueryInfo & query, - const String & distance_function_, - ContextPtr context) - : ann_condition(query, context) - , distance_function(distance_function_) -{ -} - -bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const -{ - throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); -} - -bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const -{ - return ann_condition.alwaysUnknownOrTrue(distance_function); -} - -std::vector MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const -{ - if (distance_function == DISTANCE_FUNCTION_L2) - return getUsefulRangesImpl(idx_granule); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return getUsefulRangesImpl(idx_granule); - std::unreachable(); -} - -template -std::vector MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const -{ - const UInt64 limit = ann_condition.getLimit(); - const UInt64 index_granularity = ann_condition.getIndexGranularity(); - const std::optional comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where - ? std::optional(ann_condition.getComparisonDistanceForWhereQuery()) - : std::nullopt; - - if (comparison_distance && comparison_distance.value() < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance"); - - const std::vector reference_vector = ann_condition.getReferenceVector(); - - const auto granule = std::dynamic_pointer_cast>(idx_granule); - if (granule == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); - - const USearchIndexWithSerializationPtr index = granule->index; - - if (ann_condition.getDimensions() != index->dimensions()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " - "does not match the dimension in the index ({})", - ann_condition.getDimensions(), index->dimensions()); - - auto result = index->search(reference_vector.data(), limit); - - ProfileEvents::increment(ProfileEvents::USearchSearchCount); - ProfileEvents::increment(ProfileEvents::USearchSearchVisitedMembers, result.visited_members); - ProfileEvents::increment(ProfileEvents::USearchSearchComputedDistances, result.computed_distances); - - std::vector neighbors(result.size()); /// indexes of dots which were closest to the reference vector - std::vector distances(result.size()); - result.dump_to(neighbors.data(), distances.data()); - - std::vector granules; - granules.reserve(neighbors.size()); - for (size_t i = 0; i < neighbors.size(); ++i) - { - if (comparison_distance && distances[i] > comparison_distance) - continue; - granules.push_back(neighbors[i] / index_granularity); - } - - /// make unique - std::sort(granules.begin(), granules.end()); - granules.erase(std::unique(granules.begin(), granules.end()), granules.end()); - - return granules; -} - -MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_, unum::usearch::scalar_kind_t scalar_kind_) - : IMergeTreeIndex(index_) - , distance_function(distance_function_) - , scalar_kind(scalar_kind_) -{ -} - -MergeTreeIndexGranulePtr MergeTreeIndexUSearch::createIndexGranule() const -{ - if (distance_function == DISTANCE_FUNCTION_L2) - return std::make_shared>(index.name, index.sample_block, scalar_kind); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return std::make_shared>(index.name, index.sample_block, scalar_kind); - std::unreachable(); -} - -MergeTreeIndexAggregatorPtr MergeTreeIndexUSearch::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const -{ - if (distance_function == DISTANCE_FUNCTION_L2) - return std::make_shared>(index.name, index.sample_block, scalar_kind); - else if (distance_function == DISTANCE_FUNCTION_COSINE) - return std::make_shared>(index.name, index.sample_block, scalar_kind); - std::unreachable(); -} - -MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const -{ - return std::make_shared(index, query, distance_function, context); -}; - -MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAG *, ContextPtr) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); -} - -MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index) -{ - static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2; - String distance_function = default_distance_function; - if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); - - static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k; - auto scalar_kind = default_scalar_kind; - if (index.arguments.size() > 1) - scalar_kind = nameToScalarKind.at(index.arguments[1].get()); - - return std::make_shared(index, distance_function, scalar_kind); -} - -void usearchIndexValidator(const IndexDescription & index, bool /* attach */) -{ - /// Check number and type of USearch index arguments: - - if (index.arguments.size() > 2) - throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index must not have more than one parameters"); - - if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String) - throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of USearch index (distance function) must be of type String"); - if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of USearch index (scalar type) must be of type String"); - - /// Check that the index is created on a single column - - if (index.column_names.size() != 1 || index.data_types.size() != 1) - throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "USearch indexes must be created on a single column"); - - /// Check that a supported metric was passed as first argument - - if (!index.arguments.empty()) - { - String distance_name = index.arguments[0].get(); - if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) - throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); - } - - /// Check that a supported kind was passed as a second argument - - if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].get())) - { - String supported_kinds; - for (const auto & [name, kind] : nameToScalarKind) - { - if (!supported_kinds.empty()) - supported_kinds += ", "; - supported_kinds += name; - } - throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for USearch index. Supported kinds are: {}", supported_kinds); - } - - /// Check data type of indexed column: - - auto throw_unsupported_underlying_column_exception = []() - { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "USearch can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])"); - }; - - DataTypePtr data_type = index.sample_block.getDataTypes()[0]; - - if (const auto * data_type_array = typeid_cast(data_type.get())) - { - TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); - if (!WhichDataType(nested_type_index).isFloat32()) - throw_unsupported_underlying_column_exception(); - } - else if (const auto * data_type_tuple = typeid_cast(data_type.get())) - { - const DataTypes & inner_types = data_type_tuple->getElements(); - for (const auto & inner_type : inner_types) - { - TypeIndex nested_type_index = inner_type->getTypeId(); - if (!WhichDataType(nested_type_index).isFloat32()) - throw_unsupported_underlying_column_exception(); - } - } - else - throw_unsupported_underlying_column_exception(); -} - -} - -#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h deleted file mode 100644 index 41de94402c9..00000000000 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h +++ /dev/null @@ -1,116 +0,0 @@ -#pragma once - -#ifdef ENABLE_USEARCH - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpass-failed" -#include -#pragma clang diagnostic pop - -namespace DB -{ - -using USearchImplType = unum::usearch::index_dense_gt; - -template -class USearchIndexWithSerialization : public USearchImplType -{ - using Base = USearchImplType; - -public: - USearchIndexWithSerialization(size_t dimensions, unum::usearch::scalar_kind_t scalar_kind); - void serialize(WriteBuffer & ostr) const; - void deserialize(ReadBuffer & istr); - size_t getDimensions() const; -}; - -template -using USearchIndexWithSerializationPtr = std::shared_ptr>; - - -template -struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule -{ - MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::scalar_kind_t scalar_kind_); - MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::scalar_kind_t scalar_kind_, USearchIndexWithSerializationPtr index_); - - ~MergeTreeIndexGranuleUSearch() override = default; - - void serializeBinary(WriteBuffer & ostr) const override; - void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; - - bool empty() const override { return !index.get(); } - - const String index_name; - const Block index_sample_block; - const unum::usearch::scalar_kind_t scalar_kind; - USearchIndexWithSerializationPtr index; -}; - - -template -struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator -{ - MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block, unum::usearch::scalar_kind_t scalar_kind_); - ~MergeTreeIndexAggregatorUSearch() override = default; - - bool empty() const override { return !index || index->size() == 0; } - MergeTreeIndexGranulePtr getGranuleAndReset() override; - void update(const Block & block, size_t * pos, size_t limit) override; - - const String index_name; - const Block index_sample_block; - const unum::usearch::scalar_kind_t scalar_kind; - USearchIndexWithSerializationPtr index; -}; - - -class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexConditionApproximateNearestNeighbor -{ -public: - MergeTreeIndexConditionUSearch( - const IndexDescription & index_description, - const SelectQueryInfo & query, - const String & distance_function, - ContextPtr context); - - ~MergeTreeIndexConditionUSearch() override = default; - - bool alwaysUnknownOrTrue() const override; - bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - std::vector getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override; - -private: - template - std::vector getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const; - - const ApproximateNearestNeighborCondition ann_condition; - const String distance_function; -}; - - -class MergeTreeIndexUSearch : public IMergeTreeIndex -{ -public: - MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_, unum::usearch::scalar_kind_t scalar_kind_); - - ~MergeTreeIndexUSearch() override = default; - - MergeTreeIndexGranulePtr createIndexGranule() const override; - MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; - MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; - bool isVectorSearch() const override { return true; } - -private: - const String distance_function; - const unum::usearch::scalar_kind_t scalar_kind; -}; - -} - - -#endif - diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp new file mode 100644 index 00000000000..4c0da28c3c4 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp @@ -0,0 +1,541 @@ +#include + +#if USE_USEARCH + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event USearchAddCount; + extern const Event USearchAddVisitedMembers; + extern const Event USearchAddComputedDistances; + extern const Event USearchSearchCount; + extern const Event USearchSearchVisitedMembers; + extern const Event USearchSearchComputedDistances; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int FORMAT_VERSION_TOO_OLD; + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + +/// The only indexing method currently supported by USearch +const std::set methods = {"hnsw"}; + +/// Maps from user-facing name to internal name +const std::unordered_map distanceFunctionToMetricKind = { + {"L2Distance", unum::usearch::metric_kind_t::l2sq_k}, + {"cosineDistance", unum::usearch::metric_kind_t::cos_k}}; + +/// Maps from user-facing name to internal name +const std::unordered_map quantizationToScalarKind = { + {"f32", unum::usearch::scalar_kind_t::f32_k}, + {"f16", unum::usearch::scalar_kind_t::f16_k}, + {"i8", unum::usearch::scalar_kind_t::i8_k}}; +/// Usearch provides more quantizations but ^^ above ones seem the only ones comprehensively supported across all distance functions. + +template +concept is_set = std::same_as>; + +template +concept is_unordered_map = std::same_as>; + +template +String joinByComma(const T & t) +{ + if constexpr (is_set) + { + return fmt::format("{}", fmt::join(t, ", ")); + } + else if constexpr (is_unordered_map) + { + String joined_keys; + for (const auto & [k, _] : t) + { + if (!joined_keys.empty()) + joined_keys += ", "; + joined_keys += k; + } + return joined_keys; + } + /// TODO once our libcxx is recent enough, replace above by + /// return fmt::format("{}", fmt::join(std::views::keys(t)), ", ")); + std::unreachable(); +} + +} + +USearchIndexWithSerialization::USearchIndexWithSerialization( + size_t dimensions, + unum::usearch::metric_kind_t metric_kind, + unum::usearch::scalar_kind_t scalar_kind, + UsearchHnswParams usearch_hnsw_params) +{ + USearchIndex::metric_t metric(dimensions, metric_kind, scalar_kind); + + unum::usearch::index_dense_config_t config(usearch_hnsw_params.m, usearch_hnsw_params.ef_construction, usearch_hnsw_params.ef_search); + config.enable_key_lookups = false; /// we don't do row-to-vector lookups + + if (auto result = USearchIndex::make(metric, config); !result) + throw Exception(ErrorCodes::INCORRECT_DATA, "Could not create vector similarity index. Error: {}", String(result.error.release())); + else + swap(result.index); +} + +void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const +{ + auto callback = [&ostr](void * from, size_t n) + { + ostr.write(reinterpret_cast(from), n); + return true; + }; + + if (auto result = Base::save_to_stream(callback); !result) + throw Exception(ErrorCodes::INCORRECT_DATA, "Could not save vector similarity index. Error: {}", String(result.error.release())); +} + +void USearchIndexWithSerialization::deserialize(ReadBuffer & istr) +{ + auto callback = [&istr](void * from, size_t n) + { + istr.readStrict(reinterpret_cast(from), n); + return true; + }; + + if (auto result = Base::load_from_stream(callback); !result) + /// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here + throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release())); + + if (!try_reserve(limits())) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); +} + +USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const +{ + USearchIndex::stats_t global_stats = Base::stats(); + + Statistics statistics = { + .max_level = max_level(), + .connectivity = connectivity(), + .size = size(), + .capacity = capacity(), + .memory_usage = memory_usage(), + .bytes_per_vector = bytes_per_vector(), + .scalar_words = scalar_words(), + .nodes = global_stats.nodes, + .edges = global_stats.edges, + .max_edges = global_stats.max_edges, + .level_stats = {}}; + + for (size_t i = 0; i < statistics.max_level; ++i) + statistics.level_stats.push_back(Base::stats(i)); + + return statistics; +} + +String USearchIndexWithSerialization::Statistics::toString() const +{ + return fmt::format("max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}, bytes_per_vector = {}, scalar_words = {}, nodes = {}, edges = {}, max_edges = {}", + max_level, connectivity, size, capacity, ReadableSize(memory_usage), bytes_per_vector, scalar_words, nodes, edges, max_edges); + +} +MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity( + const String & index_name_, + const Block & index_sample_block_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_) + : MergeTreeIndexGranuleVectorSimilarity(index_name_, index_sample_block_, metric_kind_, scalar_kind_, usearch_hnsw_params_, nullptr) +{ +} + +MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity( + const String & index_name_, + const Block & index_sample_block_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_, + USearchIndexWithSerializationPtr index_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , metric_kind(metric_kind_) + , scalar_kind(scalar_kind_) + , usearch_hnsw_params(usearch_hnsw_params_) + , index(std::move(index_)) +{ +} + +void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr) const +{ + LOG_TRACE(logger, "Start writing vector similarity index"); + + if (empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name)); + + writeIntBinary(FILE_FORMAT_VERSION, ostr); + + /// Number of dimensions is required in the index constructor, + /// so it must be written and read separately from the other part + writeIntBinary(static_cast(index->dimensions()), ostr); + + index->serialize(ostr); + + auto statistics = index->getStatistics(); + LOG_TRACE(logger, "Wrote vector similarity index: {}", statistics.toString()); +} + +void MergeTreeIndexGranuleVectorSimilarity::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/) +{ + LOG_TRACE(logger, "Start loading vector similarity index"); + + UInt64 file_version; + readIntBinary(file_version, istr); + if (file_version != FILE_FORMAT_VERSION) + throw Exception( + ErrorCodes::FORMAT_VERSION_TOO_OLD, + "Vector similarity index could not be loaded because its version is too old (current version: {}, persisted version: {}). Please drop the index and create it again.", + FILE_FORMAT_VERSION, file_version); + /// More fancy error handling would be: Set a flag on the index that it failed to load. During usage return all granules, i.e. + /// behave as if the index does not exist. Since format changes are expected to happen only rarely and it is "only" an index, keep it simple for now. + + UInt64 dimension; + readIntBinary(dimension, istr); + index = std::make_shared(dimension, metric_kind, scalar_kind, usearch_hnsw_params); + + index->deserialize(istr); + + auto statistics = index->getStatistics(); + LOG_TRACE(logger, "Loaded vector similarity index: {}", statistics.toString()); +} + +MergeTreeIndexAggregatorVectorSimilarity::MergeTreeIndexAggregatorVectorSimilarity( + const String & index_name_, + const Block & index_sample_block_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , metric_kind(metric_kind_) + , scalar_kind(scalar_kind_) + , usearch_hnsw_params(usearch_hnsw_params_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorVectorSimilarity::getGranuleAndReset() +{ + auto granule = std::make_shared(index_name, index_sample_block, metric_kind, scalar_kind, usearch_hnsw_params, index); + index = nullptr; + return granule; +} + +namespace +{ + +template +void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & column_array_offsets, USearchIndexWithSerializationPtr & index, size_t dimensions, size_t rows) +{ + const auto & column_array_data = column_array->getData(); + const auto & column_array_data_float = typeid_cast(column_array_data); + const auto & column_array_data_float_data = column_array_data_float.getData(); + + /// Check all sizes are the same + for (size_t row = 0; row < rows - 1; ++row) + if (column_array_offsets[row + 1] - column_array_offsets[row] != dimensions) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length"); + + /// Reserving space is mandatory + if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows))) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index"); + + for (size_t row = 0; row < rows; ++row) + { + if (auto result = index->add(static_cast(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result) + throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release())); + else + { + ProfileEvents::increment(ProfileEvents::USearchAddCount); + ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members); + ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances); + } + } +} + +} + +void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_t * pos, size_t limit) +{ + if (*pos >= block.rows()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.", + *pos, block.rows()); + + size_t rows_read = std::min(limit, block.rows() - *pos); + + if (rows_read == 0) + return; + + if (rows_read > std::numeric_limits::max()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than {} rows per index granule.", std::numeric_limits::max()); + + if (index_sample_block.columns() > 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column"); + + const String & index_column_name = index_sample_block.getByPosition(0).name; + const ColumnPtr & index_column = block.getByName(index_column_name).column; + ColumnPtr column_cut = index_column->cut(*pos, rows_read); + + const auto * column_array = typeid_cast(column_cut.get()); + if (!column_array) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array(Float*) column"); + + if (column_array->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty"); + + /// The vector similarity algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays + /// are INSERTed into an vector-similarity-indexed column or if no value was specified at all in which case the arrays take on their default + /// values which is also empty. + if (column_array->isDefaultAt(0)) + throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); + + const size_t rows = column_array->size(); + + const auto & column_array_offsets = column_array->getOffsets(); + const size_t dimensions = column_array_offsets[0]; + + if (!index) + index = std::make_shared(dimensions, metric_kind, scalar_kind, usearch_hnsw_params); + + /// Also check that previously inserted blocks have the same size as this block. + /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across + /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42. + if (index->dimensions() != dimensions) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length"); + + /// We use Usearch's index_dense_t as index type which supports only 4 bio entries according to https://github.com/unum-cloud/usearch/tree/main/cpp + if (index->size() + rows > std::numeric_limits::max()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Size of vector similarity index would exceed 4 billion entries"); + + DataTypePtr data_type = block.getDataTypes()[0]; + const auto * data_type_array = typeid_cast(data_type.get()); + if (!data_type_array) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected data type Array(Float*)"); + const TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); + + if (WhichDataType(nested_type_index).isFloat32()) + updateImpl(column_array, column_array_offsets, index, dimensions, rows); + else if (WhichDataType(nested_type_index).isFloat64()) + updateImpl(column_array, column_array_offsets, index, dimensions, rows); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected data type Array(Float*)"); + + + *pos += rows_read; +} + +MergeTreeIndexConditionVectorSimilarity::MergeTreeIndexConditionVectorSimilarity( + const IndexDescription & /*index_description*/, + const SelectQueryInfo & query, + unum::usearch::metric_kind_t metric_kind_, + ContextPtr context) + : vector_similarity_condition(query, context) + , metric_kind(metric_kind_) +{ +} + +bool MergeTreeIndexConditionVectorSimilarity::mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); +} + +bool MergeTreeIndexConditionVectorSimilarity::alwaysUnknownOrTrue() const +{ + String index_distance_function; + switch (metric_kind) + { + case unum::usearch::metric_kind_t::l2sq_k: index_distance_function = "L2Distance"; break; + case unum::usearch::metric_kind_t::cos_k: index_distance_function = "cosineDistance"; break; + default: std::unreachable(); + } + return vector_similarity_condition.alwaysUnknownOrTrue(index_distance_function); +} + +std::vector MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(MergeTreeIndexGranulePtr granule_) const +{ + const UInt64 limit = vector_similarity_condition.getLimit(); + const UInt64 index_granularity = vector_similarity_condition.getIndexGranularity(); + + const auto granule = std::dynamic_pointer_cast(granule_); + if (granule == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); + + const USearchIndexWithSerializationPtr index = granule->index; + + if (vector_similarity_condition.getDimensions() != index->dimensions()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " + "does not match the dimension in the index ({})", + vector_similarity_condition.getDimensions(), index->dimensions()); + + const std::vector reference_vector = vector_similarity_condition.getReferenceVector(); + + auto search_result = index->search(reference_vector.data(), limit); + if (!search_result) + throw Exception(ErrorCodes::INCORRECT_DATA, "Could not search in vector similarity index. Error: {}", String(search_result.error.release())); + + ProfileEvents::increment(ProfileEvents::USearchSearchCount); + ProfileEvents::increment(ProfileEvents::USearchSearchVisitedMembers, search_result.visited_members); + ProfileEvents::increment(ProfileEvents::USearchSearchComputedDistances, search_result.computed_distances); + + std::vector neighbors(search_result.size()); /// indexes of vectors which were closest to the reference vector + search_result.dump_to(neighbors.data()); + + std::vector granules; + granules.reserve(neighbors.size()); + for (auto neighbor : neighbors) + granules.push_back(neighbor / index_granularity); + + /// make unique + std::sort(granules.begin(), granules.end()); + granules.erase(std::unique(granules.begin(), granules.end()), granules.end()); + + return granules; +} + +MergeTreeIndexVectorSimilarity::MergeTreeIndexVectorSimilarity( + const IndexDescription & index_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_) + : IMergeTreeIndex(index_) + , metric_kind(metric_kind_) + , scalar_kind(scalar_kind_) + , usearch_hnsw_params(usearch_hnsw_params_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexVectorSimilarity::createIndexGranule() const +{ + return std::make_shared(index.name, index.sample_block, metric_kind, scalar_kind, usearch_hnsw_params); +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const +{ + return std::make_shared(index.name, index.sample_block, metric_kind, scalar_kind, usearch_hnsw_params); +} + +MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const +{ + return std::make_shared(index, query, metric_kind, context); +}; + +MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(const ActionsDAG *, ContextPtr) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); +} + +MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index) +{ + /// Default parameters: + unum::usearch::metric_kind_t metric_kind = distanceFunctionToMetricKind.at(index.arguments[1].safeGet()); + unum::usearch::scalar_kind_t scalar_kind = unum::usearch::scalar_kind_t::f32_k; + UsearchHnswParams usearch_hnsw_params; + + /// Optional parameters: + const bool has_six_args = (index.arguments.size() == 6); + if (has_six_args) + { + scalar_kind = quantizationToScalarKind.at(index.arguments[2].safeGet()); + usearch_hnsw_params = {.m = index.arguments[3].safeGet(), + .ef_construction = index.arguments[4].safeGet(), + .ef_search = index.arguments[5].safeGet()}; + } + + return std::make_shared(index, metric_kind, scalar_kind, usearch_hnsw_params); +} + +void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* attach */) +{ + const bool has_two_args = (index.arguments.size() == 2); + const bool has_six_args = (index.arguments.size() == 6); + + /// Check number and type of arguments + if (!has_two_args && !has_six_args) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index must have two or six arguments"); + if (index.arguments[0].getType() != Field::Types::String) + throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of vector similarity index (method) must be of type String"); + if (index.arguments[1].getType() != Field::Types::String) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of vector similarity index (metric) must be of type String"); + if (has_six_args) + { + if (index.arguments[2].getType() != Field::Types::String) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Third argument of vector similarity index (quantization) must be of type String"); + if (index.arguments[3].getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Fourth argument of vector similarity index (M) must be of type UInt64"); + if (index.arguments[4].getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Fifth argument of vector similarity index (ef_construction) must be of type UInt64"); + if (index.arguments[5].getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Sixth argument of vector similarity index (ef_search) must be of type UInt64"); + } + + /// Check that passed arguments are supported + if (!methods.contains(index.arguments[0].safeGet())) + throw Exception(ErrorCodes::INCORRECT_DATA, "First argument (method) of vector similarity index is not supported. Supported methods are: {}", joinByComma(methods)); + if (!distanceFunctionToMetricKind.contains(index.arguments[1].safeGet())) + throw Exception(ErrorCodes::INCORRECT_DATA, "Second argument (distance function) of vector similarity index is not supported. Supported distance function are: {}", joinByComma(distanceFunctionToMetricKind)); + if (has_six_args) + { + if (!quantizationToScalarKind.contains(index.arguments[2].safeGet())) + throw Exception(ErrorCodes::INCORRECT_DATA, "Third argument (quantization) of vector similarity index is not supported. Supported quantizations are: {}", joinByComma(quantizationToScalarKind)); + + /// Call Usearch's own parameter validation method for HNSW-specific parameters + UInt64 m = index.arguments[3].safeGet(); + UInt64 ef_construction = index.arguments[4].safeGet(); + UInt64 ef_search = index.arguments[5].safeGet(); + + unum::usearch::index_dense_config_t config(m, ef_construction, ef_search); + + if (auto error = config.validate(); error) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid parameters passed to vector similarity index. Error: {}", String(error.release())); + } + + /// Check that the index is created on a single column + if (index.column_names.size() != 1 || index.data_types.size() != 1) + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Vector similarity indexes must be created on a single column"); + + /// Check that the data type is Array(Float*) + DataTypePtr data_type = index.sample_block.getDataTypes()[0]; + const auto * data_type_array = typeid_cast(data_type.get()); + if (!data_type_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float*)"); + TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float*)"); +} + +} + +#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h new file mode 100644 index 00000000000..c4c03254d2d --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h @@ -0,0 +1,174 @@ +#pragma once + +#include "config.h" + +#if USE_USEARCH + +#include +#include +#include + +namespace DB +{ + +struct UsearchHnswParams +{ + size_t m = unum::usearch::default_connectivity(); + size_t ef_construction = unum::usearch::default_expansion_add(); + size_t ef_search = unum::usearch::default_expansion_search(); +}; + +using USearchIndex = unum::usearch::index_dense_t; + +class USearchIndexWithSerialization : public USearchIndex +{ + using Base = USearchIndex; + +public: + USearchIndexWithSerialization( + size_t dimensions, + unum::usearch::metric_kind_t metric_kind, + unum::usearch::scalar_kind_t scalar_kind, + UsearchHnswParams usearch_hnsw_params); + + void serialize(WriteBuffer & ostr) const; + void deserialize(ReadBuffer & istr); + + struct Statistics + { + size_t max_level; + size_t connectivity; + size_t size; /// number of indexed vectors + size_t capacity; /// reserved number of indexed vectors + size_t memory_usage; /// byte size (not exact) + size_t bytes_per_vector; + size_t scalar_words; + size_t nodes; + size_t edges; + size_t max_edges; + + std::vector level_stats; /// for debugging, excluded from getStatistics() + + String toString() const; + }; + + Statistics getStatistics() const; +}; + +using USearchIndexWithSerializationPtr = std::shared_ptr; + + +struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranule +{ + MergeTreeIndexGranuleVectorSimilarity( + const String & index_name_, + const Block & index_sample_block_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_); + + MergeTreeIndexGranuleVectorSimilarity( + const String & index_name_, + const Block & index_sample_block_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_, + USearchIndexWithSerializationPtr index_); + + ~MergeTreeIndexGranuleVectorSimilarity() override = default; + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; + + bool empty() const override { return !index || index->size() == 0; } + + const String index_name; + const Block index_sample_block; + const unum::usearch::metric_kind_t metric_kind; + const unum::usearch::scalar_kind_t scalar_kind; + const UsearchHnswParams usearch_hnsw_params; + USearchIndexWithSerializationPtr index; + + LoggerPtr logger = getLogger("VectorSimilarityIndex"); + +private: + /// The version of the persistence format of USearch index. Increment whenever you change the format. + /// Note: USearch prefixes the serialized data with its own version header. We can't rely on that because 1. the index in ClickHouse + /// is (at least in theory) agnostic of specific vector search libraries, and 2. additional data (e.g. the number of dimensions) + /// outside USearch exists which we should version separately. + static constexpr UInt64 FILE_FORMAT_VERSION = 1; +}; + + +struct MergeTreeIndexAggregatorVectorSimilarity final : IMergeTreeIndexAggregator +{ + MergeTreeIndexAggregatorVectorSimilarity( + const String & index_name_, + const Block & index_sample_block, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_); + + ~MergeTreeIndexAggregatorVectorSimilarity() override = default; + + bool empty() const override { return !index || index->size() == 0; } + MergeTreeIndexGranulePtr getGranuleAndReset() override; + void update(const Block & block, size_t * pos, size_t limit) override; + + const String index_name; + const Block index_sample_block; + const unum::usearch::metric_kind_t metric_kind; + const unum::usearch::scalar_kind_t scalar_kind; + const UsearchHnswParams usearch_hnsw_params; + USearchIndexWithSerializationPtr index; +}; + + +class MergeTreeIndexConditionVectorSimilarity final : public IMergeTreeIndexCondition +{ +public: + MergeTreeIndexConditionVectorSimilarity( + const IndexDescription & index_description, + const SelectQueryInfo & query, + unum::usearch::metric_kind_t metric_kind_, + ContextPtr context); + + ~MergeTreeIndexConditionVectorSimilarity() override = default; + + bool alwaysUnknownOrTrue() const override; + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override; + std::vector getUsefulRanges(MergeTreeIndexGranulePtr granule) const override; + +private: + const VectorSimilarityCondition vector_similarity_condition; + const unum::usearch::metric_kind_t metric_kind; +}; + + +class MergeTreeIndexVectorSimilarity : public IMergeTreeIndex +{ +public: + MergeTreeIndexVectorSimilarity( + const IndexDescription & index_, + unum::usearch::metric_kind_t metric_kind_, + unum::usearch::scalar_kind_t scalar_kind_, + UsearchHnswParams usearch_hnsw_params_); + + ~MergeTreeIndexVectorSimilarity() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; + bool isVectorSimilarityIndex() const override { return true; } + +private: + const unum::usearch::metric_kind_t metric_kind; + const unum::usearch::scalar_kind_t scalar_kind; + const UsearchHnswParams usearch_hnsw_params; +}; + +} + + +#endif diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index bded961db8e..d2fc0e84b56 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -127,15 +127,21 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerCreator("hypothesis", hypothesisIndexCreator); registerValidator("hypothesis", hypothesisIndexValidator); -#ifdef ENABLE_ANNOY - registerCreator("annoy", annoyIndexCreator); - registerValidator("annoy", annoyIndexValidator); -#endif -#ifdef ENABLE_USEARCH - registerCreator("usearch", usearchIndexCreator); - registerValidator("usearch", usearchIndexValidator); +#if USE_USEARCH + registerCreator("vector_similarity", vectorSimilarityIndexCreator); + registerValidator("vector_similarity", vectorSimilarityIndexValidator); #endif + /// ------ + /// TODO: remove this block at the end of 2024. + /// Index types 'annoy' and 'usearch' are no longer supported as of June 2024. Their successor is index type 'vector_similarity'. + /// To support loading tables with old indexes during a transition period, register dummy indexes which allow load/attaching but + /// throw an exception when the user attempts to use them. + registerCreator("annoy", legacyVectorSimilarityIndexCreator); + registerValidator("annoy", legacyVectorSimilarityIndexValidator); + registerCreator("usearch", legacyVectorSimilarityIndexCreator); + registerValidator("usearch", legacyVectorSimilarityIndexValidator); + /// ------ registerCreator("inverted", fullTextIndexCreator); registerValidator("inverted", fullTextIndexValidator); diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 1be73e1c811..c52d7ffe131 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -15,6 +15,7 @@ #include #include +#include "config.h" constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; @@ -92,6 +93,13 @@ public: virtual bool alwaysUnknownOrTrue() const = 0; virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0; + + /// Special stuff for vector similarity indexes + /// - Returns vector of indexes of ranges in granule which are useful for query. + virtual std::vector getUsefulRanges(MergeTreeIndexGranulePtr) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for non-vector-similarity indexes."); + } }; using MergeTreeIndexConditionPtr = std::shared_ptr; @@ -169,7 +177,7 @@ struct IMergeTreeIndex virtual MergeTreeIndexConditionPtr createIndexCondition( const ActionsDAG * filter_actions_dag, ContextPtr context) const = 0; - virtual bool isVectorSearch() const { return false; } + virtual bool isVectorSimilarityIndex() const { return false; } virtual MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & /*query_info*/, StorageMetadataPtr /*storage_metadata*/) const @@ -230,17 +238,15 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach); MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index); void hypothesisIndexValidator(const IndexDescription & index, bool attach); -#ifdef ENABLE_ANNOY -MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index); -void annoyIndexValidator(const IndexDescription & index, bool attach); +#if USE_USEARCH +MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index); +void vectorSimilarityIndexValidator(const IndexDescription & index, bool attach); #endif -#ifdef ENABLE_USEARCH -MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index); -void usearchIndexValidator(const IndexDescription& index, bool attach); -#endif +MergeTreeIndexPtr legacyVectorSimilarityIndexCreator(const IndexDescription & index); +void legacyVectorSimilarityIndexValidator(const IndexDescription & index, bool attach); -MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription& index); -void fullTextIndexValidator(const IndexDescription& index, bool attach); +MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription & index); +void fullTextIndexValidator(const IndexDescription & index, bool attach); } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index b240f80ee13..5b5bc244f92 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -241,7 +241,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::serverTimezoneInstance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) - result += toString(value[i].get().toUnderType()); + result += toString(value[i].safeGet().toUnderType()); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index a9b77fb6c03..7081eb716f5 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -395,6 +395,10 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ part_stat.prefetch_step_marks = std::max(part_stat.prefetch_step_marks, per_part_infos[i]->min_marks_per_task); + if (part_stat.prefetch_step_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + LOG_DEBUG( log, "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index cc321cd5a4d..23c314e48f5 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes { extern const int CANNOT_SCHEDULE_TASK; extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; } MergeTreeReadPool::MergeTreeReadPool( @@ -235,6 +236,10 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) const auto part_idx = current_parts.back().part_idx; const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task; + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + /// Do not get too few rows from part. if (marks_in_part >= min_marks_per_task && need_marks < min_marks_per_task) need_marks = min_marks_per_task; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 6d2560bc9c7..95a10454f9e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -85,6 +85,7 @@ static size_t calculateMinMarksPerTask( min_marks_per_task = heuristic_min_marks; } } + LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); return min_marks_per_task; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 33eaf5a49bd..d23072771f2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( @@ -38,6 +39,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + extension.all_callback( InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica)); } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 6b5cf978423..42ffc4304b2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrder( @@ -37,6 +38,10 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + for (const auto & part : parts_ranges) request.push_back({part.data_part->info, MarkRanges{}}); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 69dc2e4b2bb..7451374070c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -234,7 +234,7 @@ void MergeTreeReaderCompact::readPrefix( serialization = getSerializationInPart(name_and_type); deserialize_settings.getter = buffer_getter; - deserialize_settings.dynamic_read_statistics = true; + deserialize_settings.object_and_dynamic_read_statistics = true; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr); } catch (Exception & e) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index b6882fdced9..898bf5a2933 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -213,6 +213,10 @@ void MergeTreeReaderWide::addStreams( ISerialization::StreamCallback callback = [&] (const ISerialization::SubstreamPath & substream_path) { + /// Don't create streams for ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); /** If data file is missing then we will not try to open it. @@ -326,7 +330,7 @@ void MergeTreeReaderWide::deserializePrefix( if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; - deserialize_settings.dynamic_read_statistics = true; + deserialize_settings.object_and_dynamic_read_statistics = true; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); @@ -348,6 +352,10 @@ void MergeTreeReaderWide::prefetchForColumn( deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); auto callback = [&](const ISerialization::SubstreamPath & substream_path) { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); if (stream_name && !prefetched_streams.contains(*stream_name)) diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index c968ad84936..dabb6991b0b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -59,15 +59,19 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte CustomType custom; if (name == "disk") { + ASTPtr value_as_custom_ast = nullptr; if (value.tryGet(custom) && 0 == strcmp(custom.getTypeName(), "AST")) + value_as_custom_ast = dynamic_cast(custom.getImpl()).ast; + + if (value_as_custom_ast && isDiskFunction(value_as_custom_ast)) { - auto ast = dynamic_cast(custom.getImpl()).ast; - if (ast && isDiskFunction(ast)) - { - auto disk_name = getOrCreateDiskFromDiskAST(ast, context, is_attach); - LOG_TRACE(getLogger("MergeTreeSettings"), "Created custom disk {}", disk_name); - value = disk_name; - } + auto disk_name = DiskFomAST::createCustomDisk(value_as_custom_ast, context, is_attach); + LOG_DEBUG(getLogger("MergeTreeSettings"), "Created custom disk {}", disk_name); + value = disk_name; + } + else + { + DiskFomAST::ensureDiskIsNotCustom(value.safeGet(), context); } if (has("storage_policy")) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 5ba1988cc5d..0769b60dc6b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -67,8 +67,8 @@ struct Settings; M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ - M(UInt64, merge_selecting_sleep_ms, 5000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ - M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ @@ -84,6 +84,7 @@ struct Settings; M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ M(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ + M(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ @@ -215,6 +216,7 @@ struct Settings; M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \ M(DeduplicateMergeProjectionMode, deduplicate_merge_projection_mode, DeduplicateMergeProjectionMode::THROW, "Whether to allow create projection for the table with non-classic MergeTree, if allowed, what is the action when merge, drop or rebuild.", 0) \ #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \ diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 36ff6c0a4bd..9c82817e8cb 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -152,23 +152,15 @@ const ActionsDAG::Node & addFunction( const ActionsDAG::Node & addCast( const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, - const String & type_name, + const DataTypePtr & to_type, OriginalToNewNodeMap & node_remap) { - if (node_to_cast.result_type->getName() == type_name) + if (!node_to_cast.result_type->equals(*to_type)) return node_to_cast; - Field cast_type_constant_value(type_name); - - ColumnWithTypeAndName column; - column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); - column.type = std::make_shared(); - - const auto * cast_type_constant_node = &dag->addColumn(std::move(column)); - ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - - return addFunction(dag, func_builder_cast, std::move(children), node_remap); + const auto & new_node = dag->addCast(node_to_cast, to_type, {}); + node_remap[new_node.result_name] = {dag.get(), &new_node}; + return new_node; } /// Normalizes the filter node by adding AND with a constant true. @@ -332,7 +324,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// Build AND(last_step_result_node, true) const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap); /// Build CAST(and_node, type of PREWHERE column) - const auto & cast_node = addCast(last_step_dag, and_node, output->result_type->getName(), node_remap); + const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap); /// Add alias for the result with the name of the PREWHERE column const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name); last_step_dag->addOrReplaceInOutputs(prewhere_result_node); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 43c40dee77d..f0c26c302e1 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -222,17 +222,17 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet /// check the value with respect to threshold if (type == Field::Types::UInt64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value > threshold; } else if (type == Field::Types::Int64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } else if (type == Field::Types::Float64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 73084f487b9..56f68fd265a 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -254,6 +254,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit LOG_ERROR(log, "{}. Data after mutation is not byte-identical to data on another replicas. " "We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false)); + mutate_task->updateProfileEvents(); write_part_log(ExecutionStatus::fromCurrentException("", true)); if (storage.getSettings()->detach_not_byte_identical_parts) @@ -281,6 +282,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit */ finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); }; ProfileEvents::increment(ProfileEvents::ReplicatedPartMutations); + mutate_task->updateProfileEvents(); write_part_log({}); return true; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 9aec074deae..10461eb5942 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -102,6 +102,7 @@ bool MutatePlainMergeTreeTask::executeStep() transaction.commit(); storage.updateMutationEntriesErrors(future_part, true, ""); + mutate_task->updateProfileEvents(); write_part_log({}); state = State::NEED_FINISH; @@ -114,6 +115,7 @@ bool MutatePlainMergeTreeTask::executeStep() PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); LOG_ERROR(getLogger("MutatePlainMergeTreeTask"), exception_message); storage.updateMutationEntriesErrors(future_part, false, exception_message.text); + mutate_task->updateProfileEvents(); write_part_log(ExecutionStatus::fromCurrentException("", true)); tryLogCurrentException(__PRETTY_FUNCTION__); return false; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9a775db73e2..55a4947832e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -38,7 +38,13 @@ namespace ProfileEvents { -extern const Event MutateTaskProjectionsCalculationMicroseconds; + extern const Event MutationTotalParts; + extern const Event MutationUntouchedParts; + extern const Event MutationTotalMilliseconds; + extern const Event MutationExecuteMilliseconds; + extern const Event MutationAllPartColumns; + extern const Event MutationSomePartColumns; + extern const Event MutateTaskProjectionsCalculationMicroseconds; } namespace CurrentMetrics @@ -546,7 +552,7 @@ static std::set getStatisticsToRecalculate(const StorageMet { if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name)) { - stats_to_recalc.insert(stats_factory.get(col_desc.statistics)); + stats_to_recalc.insert(stats_factory.get(col_desc)); } } return stats_to_recalc; @@ -659,7 +665,7 @@ static NameSet collectFilesToSkip( const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension, - const std::set & projections_to_recalc, + const std::set & projections_to_skip, const std::set & stats_to_recalc) { NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); @@ -684,7 +690,7 @@ static NameSet collectFilesToSkip( } } - for (const auto & projection : projections_to_recalc) + for (const auto & projection : projections_to_skip) files_to_skip.insert(projection->getDirectoryName()); for (const auto & stat : stats_to_recalc) @@ -1046,6 +1052,7 @@ struct MutationContext /// Whether we need to count lightweight delete rows in this mutation bool count_lightweight_deleted_rows; + UInt64 execute_elapsed_ns = 0; }; using MutationContextPtr = std::shared_ptr; @@ -1250,6 +1257,8 @@ public: private: void prepare(); bool mutateOriginalPartAndPrepareProjections(); + void writeTempProjectionPart(size_t projection_idx, Chunk chunk); + void finalizeTempProjections(); bool iterateThroughAllProjections(); void constructTaskForProjectionPartsMerge(); void finalize(); @@ -1300,10 +1309,22 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { - Block cur_block; - Block projection_header; - if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) + Stopwatch watch(CLOCK_MONOTONIC_COARSE); + UInt64 step_time_ms = ctx->data->getSettings()->background_task_preferred_step_execution_time_ms.totalMilliseconds(); + + do { + Block cur_block; + Block projection_header; + + MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry); + + if (!ctx->mutating_executor->pull(cur_block)) + { + finalizeTempProjections(); + return false; + } + if (ctx->minmax_idx) ctx->minmax_idx->update(cur_block, MergeTreeData::getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); @@ -1315,46 +1336,56 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { - const auto & projection = *ctx->projections_to_build[i]; + Chunk squashed_chunk; - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); - auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); - tmp_part.finalize(); - tmp_part.part->getDataPartStorage().commitTransaction(); - projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); + ProfileEventTimeIncrement projection_watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + Block block_to_squash = ctx->projections_to_build[i]->calculate(cur_block, ctx->context); + + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); } + + if (squashed_chunk) + writeTempProjectionPart(i, std::move(squashed_chunk)); } (*ctx->mutate_entry)->rows_written += cur_block.rows(); (*ctx->mutate_entry)->bytes_written_uncompressed += cur_block.bytes(); + } while (watch.elapsedMilliseconds() < step_time_ms); - /// Need execute again - return true; - } + /// Need execute again + return true; +} +void PartMergerWriter::writeTempProjectionPart(size_t projection_idx, Chunk chunk) +{ + const auto & projection = *ctx->projections_to_build[projection_idx]; + const auto & projection_plan = projection_squashes[projection_idx]; + + auto result = projection_plan.getHeader().cloneWithColumns(chunk.detachColumns()); + + auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( + *ctx->data, + ctx->log, + result, + projection, + ctx->new_data_part.get(), + ++block_num); + + tmp_part.finalize(); + tmp_part.part->getDataPartStorage().commitTransaction(); + projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); +} + +void PartMergerWriter::finalizeTempProjections() +{ // Write the last block for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { - const auto & projection = *ctx->projections_to_build[i]; - auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); + auto squashed_chunk = Squashing::squash(projection_squashes[i].flush()); if (squashed_chunk) - { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); - auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); - temp_part.finalize(); - temp_part.part->getDataPartStorage().commitTransaction(); - projection_parts[projection.name].emplace_back(std::move(temp_part.part)); - } + writeTempProjectionPart(i, std::move(squashed_chunk)); } projection_parts_iterator = std::make_move_iterator(projection_parts.begin()); @@ -1362,12 +1393,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() /// Maybe there are no projections ? if (projection_parts_iterator != std::make_move_iterator(projection_parts.end())) constructTaskForProjectionPartsMerge(); - - /// Let's move on to the next stage - return false; } - void PartMergerWriter::constructTaskForProjectionPartsMerge() { auto && [name, parts] = *projection_parts_iterator; @@ -1530,7 +1557,7 @@ private: if (ctx->materialized_statistics.contains(col.name)) { - stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics)); + stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col)); } else { @@ -1554,6 +1581,10 @@ private: removed_projections.insert(command.column_name); } + bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + bool lightweight_delete_drop = lightweight_delete_mode + && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP; + const auto & projections = ctx->metadata_snapshot->getProjections(); for (const auto & projection : projections) { @@ -1561,10 +1592,11 @@ private: continue; bool need_recalculate = - ctx->materialized_projections.contains(projection.name) + (ctx->materialized_projections.contains(projection.name) || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name) - && !ctx->source_part->hasBrokenProjection(projection.name)); + && !ctx->source_part->hasBrokenProjection(projection.name))) + && !lightweight_delete_drop; if (need_recalculate) { @@ -1572,7 +1604,7 @@ private: } else { - if (ctx->source_part->checksums.has(projection.getDirectoryName())) + if (!lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) entries_to_hardlink.insert(projection.getDirectoryName()); } } @@ -2017,6 +2049,9 @@ MutateTask::MutateTask( bool MutateTask::execute() { + Stopwatch watch; + SCOPE_EXIT({ ctx->execute_elapsed_ns += watch.elapsedNanoseconds(); }); + switch (state) { case State::NEED_PREPARE: @@ -2050,6 +2085,15 @@ bool MutateTask::execute() return false; } +void MutateTask::updateProfileEvents() const +{ + UInt64 total_elapsed_ms = (*ctx->mutate_entry)->watch.elapsedMilliseconds(); + UInt64 execute_elapsed_ms = ctx->execute_elapsed_ns / 1000000UL; + + ProfileEvents::increment(ProfileEvents::MutationTotalMilliseconds, total_elapsed_ms); + ProfileEvents::increment(ProfileEvents::MutationExecuteMilliseconds, execute_elapsed_ms); +} + static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const MutationCommand & command) { if (command.type != MutationCommand::READ_COLUMN) @@ -2112,6 +2156,7 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con bool MutateTask::prepare() { + ProfileEvents::increment(ProfileEvents::MutationTotalParts); MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry); if (ctx->future_part->parts.size() != 1) @@ -2134,7 +2179,7 @@ bool MutateTask::prepare() ctx->commands_for_part.emplace_back(command); if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) + ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) { NameSet files_to_copy_instead_of_hardlinks; auto settings_ptr = ctx->data->getSettings(); @@ -2174,6 +2219,7 @@ bool MutateTask::prepare() ctx->temporary_directory_lock = std::move(lock); } + ProfileEvents::increment(ProfileEvents::MutationUntouchedParts); promise.set_value(std::move(part)); return false; } @@ -2198,6 +2244,8 @@ bool MutateTask::prepare() ctx->stage_progress = std::make_unique(1.0); + bool lightweight_delete_mode = false; + if (!ctx->for_interpreter.empty()) { /// Always disable filtering in mutations: we want to read and write all rows because for updates we rewrite only some of the @@ -2215,6 +2263,21 @@ bool MutateTask::prepare() ctx->mutating_pipeline_builder = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress); + + lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know + /// the condition as early as from here. + if (lightweight_delete_mode + && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD) + { + for (const auto & projection : ctx->metadata_snapshot->getProjections()) + { + if (!ctx->source_part->hasProjection(projection.name)) + continue; + + ctx->materialized_projections.insert(projection.name); + } + } } auto single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); @@ -2256,7 +2319,7 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name)) + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode) { /// This mutation contains lightweight delete and we need to count the deleted rows, /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column @@ -2283,6 +2346,7 @@ bool MutateTask::prepare() ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; task = std::make_unique(ctx); + ProfileEvents::increment(ProfileEvents::MutationAllPartColumns); } else /// TODO: check that we modify only non-key columns in this case. { @@ -2293,10 +2357,30 @@ bool MutateTask::prepare() ctx->context, ctx->materialized_indices); - ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( - ctx->source_part, - ctx->metadata_snapshot, - ctx->materialized_projections); + auto lightweight_mutation_projection_mode = ctx->data->getSettings()->lightweight_mutation_projection_mode; + bool lightweight_delete_drops_projections = + lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP + || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW; + + std::set projections_to_skip_container; + auto * projections_to_skip = &projections_to_skip_container; + + bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections); + /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty. + if (should_create_projections) + { + ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( + ctx->source_part, + ctx->metadata_snapshot, + ctx->materialized_projections); + + projections_to_skip = &ctx->projections_to_recalc; + } + else + { + for (const auto & projection : ctx->metadata_snapshot->getProjections()) + projections_to_skip->insert(&projection); + } ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics); @@ -2306,7 +2390,7 @@ bool MutateTask::prepare() ctx->updated_header, ctx->indices_to_recalc, ctx->mrk_extension, - ctx->projections_to_recalc, + *projections_to_skip, ctx->stats_to_recalc); ctx->files_to_rename = MutationHelpers::collectFilesForRenames( @@ -2322,6 +2406,7 @@ bool MutateTask::prepare() ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER; task = std::make_unique(ctx); + ProfileEvents::increment(ProfileEvents::MutationSomePartColumns); } return true; diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index dc22b90f0e9..08427bff6d8 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -39,6 +39,7 @@ public: bool need_prefix_); bool execute(); + void updateProfileEvents() const; std::future getFuture() { diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f46b4de10b7..ee47fe3549a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1004,6 +1004,10 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request) { + if (request.min_number_of_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + ProfileEventTimeIncrement watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds); std::lock_guard lock(mutex); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 05fd6f6915b..d3ccda904b6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -49,6 +48,20 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); } +void ReplicatedMergeTreeRestartingThread::start(bool schedule) +{ + LOG_TRACE(log, "Starting the restating thread, schedule: {}", schedule); + if (schedule) + task->activateAndSchedule(); + else + task->activate(); +} + +void ReplicatedMergeTreeRestartingThread::wakeup() +{ + task->schedule(); +} + void ReplicatedMergeTreeRestartingThread::run() { if (need_stop) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 01071d80e8b..d719505ae5e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -24,16 +24,9 @@ class ReplicatedMergeTreeRestartingThread public: explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - void start(bool schedule = true) - { - LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule); - if (schedule) - task->activateAndSchedule(); - else - task->activate(); - } + void start(bool schedule); - void wakeup() { task->schedule(); } + void wakeup(); void shutdown(bool part_of_full_shutdown); diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.cpp b/src/Storages/MergeTree/VectorSimilarityCondition.cpp new file mode 100644 index 00000000000..c8f33857640 --- /dev/null +++ b/src/Storages/MergeTree/VectorSimilarityCondition.cpp @@ -0,0 +1,350 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +namespace +{ + +template +void extractReferenceVectorFromLiteral(std::vector & reference_vector, Literal literal) +{ + Float64 float_element_of_reference_vector; + Int64 int_element_of_reference_vector; + + for (const auto & value : literal.value()) + { + if (value.tryGet(float_element_of_reference_vector)) + reference_vector.emplace_back(float_element_of_reference_vector); + else if (value.tryGet(int_element_of_reference_vector)) + reference_vector.emplace_back(static_cast(int_element_of_reference_vector)); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong type of elements in reference vector. Only float or int are supported."); + } +} + +VectorSimilarityCondition::Info::DistanceFunction stringToDistanceFunction(std::string_view distance_function) +{ + if (distance_function == "L2Distance") + return VectorSimilarityCondition::Info::DistanceFunction::L2; + else + return VectorSimilarityCondition::Info::DistanceFunction::Unknown; +} + +} + +VectorSimilarityCondition::VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context) + : block_with_constants(KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)) + , index_granularity(context->getMergeTreeSettings().index_granularity) + , max_limit_for_ann_queries(context->getSettingsRef().max_limit_for_ann_queries) + , index_is_useful(checkQueryStructure(query_info)) +{} + +bool VectorSimilarityCondition::alwaysUnknownOrTrue(String distance_function) const +{ + if (!index_is_useful) + return true; /// query isn't supported + /// If query is supported, check if distance function of index is the same as distance function in query + return !(stringToDistanceFunction(distance_function) == query_information->distance_function); +} + +UInt64 VectorSimilarityCondition::getLimit() const +{ + if (index_is_useful && query_information.has_value()) + return query_information->limit; + throw Exception(ErrorCodes::LOGICAL_ERROR, "No LIMIT section in query, not supported"); +} + +std::vector VectorSimilarityCondition::getReferenceVector() const +{ + if (index_is_useful && query_information.has_value()) + return query_information->reference_vector; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Reference vector was requested for useless or uninitialized index."); +} + +size_t VectorSimilarityCondition::getDimensions() const +{ + if (index_is_useful && query_information.has_value()) + return query_information->reference_vector.size(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of dimensions was requested for useless or uninitialized index."); +} + +String VectorSimilarityCondition::getColumnName() const +{ + if (index_is_useful && query_information.has_value()) + return query_information->column_name; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name was requested for useless or uninitialized index."); +} + +VectorSimilarityCondition::Info::DistanceFunction VectorSimilarityCondition::getDistanceFunction() const +{ + if (index_is_useful && query_information.has_value()) + return query_information->distance_function; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Distance function was requested for useless or uninitialized index."); +} + +bool VectorSimilarityCondition::checkQueryStructure(const SelectQueryInfo & query) +{ + Info order_by_info; + + /// Build rpns for query sections + const auto & select = query.query->as(); + + RPN rpn_order_by; + RPNElement rpn_limit; + UInt64 limit; + + if (select.limitLength()) + traverseAtomAST(select.limitLength(), rpn_limit); + + if (select.orderBy()) + traverseOrderByAST(select.orderBy(), rpn_order_by); + + /// Reverse RPNs for conveniences during parsing + std::reverse(rpn_order_by.begin(), rpn_order_by.end()); + + const bool order_by_is_valid = matchRPNOrderBy(rpn_order_by, order_by_info); + const bool limit_is_valid = matchRPNLimit(rpn_limit, limit); + + if (!limit_is_valid || limit > max_limit_for_ann_queries) + return false; + + if (order_by_is_valid) + { + query_information = std::move(order_by_info); + query_information->limit = limit; + return true; + } + + return false; +} + +void VectorSimilarityCondition::traverseAST(const ASTPtr & node, RPN & rpn) +{ + /// If the node is ASTFunction, it may have children nodes + if (const auto * func = node->as()) + { + const ASTs & children = func->arguments->children; + /// Traverse children nodes + for (const auto& child : children) + traverseAST(child, rpn); + } + + RPNElement element; + /// Get the data behind node + if (!traverseAtomAST(node, element)) + element.function = RPNElement::FUNCTION_UNKNOWN; + + rpn.emplace_back(std::move(element)); +} + +bool VectorSimilarityCondition::traverseAtomAST(const ASTPtr & node, RPNElement & out) +{ + /// Match Functions + if (const auto * function = node->as()) + { + /// Set the name + out.func_name = function->name; + + if (function->name == "L1Distance" || + function->name == "L2Distance" || + function->name == "LinfDistance" || + function->name == "cosineDistance" || + function->name == "dotProduct") + out.function = RPNElement::FUNCTION_DISTANCE; + else if (function->name == "array") + out.function = RPNElement::FUNCTION_ARRAY; + else if (function->name == "_CAST") + out.function = RPNElement::FUNCTION_CAST; + else + return false; + + return true; + } + /// Match identifier + else if (const auto * identifier = node->as()) + { + out.function = RPNElement::FUNCTION_IDENTIFIER; + out.identifier.emplace(identifier->name()); + out.func_name = "column identifier"; + + return true; + } + + /// Check if we have constants behind the node + return tryCastToConstType(node, out); +} + +bool VectorSimilarityCondition::tryCastToConstType(const ASTPtr & node, RPNElement & out) +{ + Field const_value; + DataTypePtr const_type; + + if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) + { + /// Check for constant types + if (const_value.getType() == Field::Types::Float64) + { + out.function = RPNElement::FUNCTION_FLOAT_LITERAL; + out.float_literal.emplace(const_value.safeGet()); + out.func_name = "Float literal"; + return true; + } + + if (const_value.getType() == Field::Types::UInt64) + { + out.function = RPNElement::FUNCTION_INT_LITERAL; + out.int_literal.emplace(const_value.safeGet()); + out.func_name = "Int literal"; + return true; + } + + if (const_value.getType() == Field::Types::Int64) + { + out.function = RPNElement::FUNCTION_INT_LITERAL; + out.int_literal.emplace(const_value.safeGet()); + out.func_name = "Int literal"; + return true; + } + + if (const_value.getType() == Field::Types::Array) + { + out.function = RPNElement::FUNCTION_LITERAL_ARRAY; + out.array_literal = const_value.safeGet(); + out.func_name = "Array literal"; + return true; + } + + if (const_value.getType() == Field::Types::String) + { + out.function = RPNElement::FUNCTION_STRING_LITERAL; + out.func_name = const_value.safeGet(); + return true; + } + } + + return false; +} + +void VectorSimilarityCondition::traverseOrderByAST(const ASTPtr & node, RPN & rpn) +{ + if (const auto * expr_list = node->as()) + if (const auto * order_by_element = expr_list->children.front()->as()) + traverseAST(order_by_element->children.front(), rpn); +} + +/// Returns true and stores ANNExpr if the query has valid ORDERBY clause +bool VectorSimilarityCondition::matchRPNOrderBy(RPN & rpn, Info & info) +{ + /// ORDER BY clause must have at least 3 expressions + if (rpn.size() < 3) + return false; + + auto iter = rpn.begin(); + auto end = rpn.end(); + + bool identifier_found = false; + + /// Matches DistanceFunc->[Column]->[ArrayFunc]->ReferenceVector(floats)->[Column] + if (iter->function != RPNElement::FUNCTION_DISTANCE) + return false; + + info.distance_function = stringToDistanceFunction(iter->func_name); + ++iter; + + if (iter->function == RPNElement::FUNCTION_IDENTIFIER) + { + identifier_found = true; + info.column_name = std::move(iter->identifier.value()); + ++iter; + } + + if (iter->function == RPNElement::FUNCTION_ARRAY) + ++iter; + + if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY) + { + extractReferenceVectorFromLiteral(info.reference_vector, iter->array_literal); + ++iter; + } + + /// further conditions are possible if there is no array, or no identifier is found + /// the array can be inside a cast function. For other cases, see the loop after this condition + if (iter != end && iter->function == RPNElement::FUNCTION_CAST) + { + ++iter; + /// Cast should be made to array + if (!iter->func_name.starts_with("Array")) + return false; + ++iter; + if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY) + { + extractReferenceVectorFromLiteral(info.reference_vector, iter->array_literal); + ++iter; + } + else + return false; + } + + while (iter != end) + { + if (iter->function == RPNElement::FUNCTION_FLOAT_LITERAL || + iter->function == RPNElement::FUNCTION_INT_LITERAL) + info.reference_vector.emplace_back(getFloatOrIntLiteralOrPanic(iter)); + else if (iter->function == RPNElement::FUNCTION_IDENTIFIER) + { + if (identifier_found) + return false; + info.column_name = std::move(iter->identifier.value()); + identifier_found = true; + } + else + return false; + + ++iter; + } + + /// Final checks of correctness + return identifier_found && !info.reference_vector.empty(); +} + +/// Returns true and stores Length if we have valid LIMIT clause in query +bool VectorSimilarityCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit) +{ + if (rpn.function == RPNElement::FUNCTION_INT_LITERAL) + { + limit = rpn.int_literal.value(); + return true; + } + + return false; +} + +/// Gets float or int from AST node +float VectorSimilarityCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter) +{ + if (iter->float_literal.has_value()) + return iter->float_literal.value(); + if (iter->int_literal.has_value()) + return static_cast(iter->int_literal.value()); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong parsed AST in buildRPN\n"); +} + +} diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/VectorSimilarityCondition.h similarity index 53% rename from src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h rename to src/Storages/MergeTree/VectorSimilarityCondition.h index 5da2a714b02..2380f8f46b0 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h +++ b/src/Storages/MergeTree/VectorSimilarityCondition.h @@ -9,52 +9,9 @@ namespace DB { -static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance"; -static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance"; - -/// Approximate Nearest Neighbour queries have a similar structure: -/// - reference vector from which all distances are calculated -/// - metric name (e.g L2Distance, LpDistance, etc.) -/// - name of column with embeddings -/// - type of query -/// - maximum number of returned elements (LIMIT) -/// -/// And two optional parameters: -/// - p for LpDistance function -/// - distance to compare with (only for where queries) -/// -/// This struct holds all these components. -struct ApproximateNearestNeighborInformation -{ - using Embedding = std::vector; - Embedding reference_vector; - - enum class Metric : uint8_t - { - Unknown, - L2, - Lp - }; - Metric metric; - - String column_name; - UInt64 limit; - - enum class Type : uint8_t - { - OrderBy, - Where - }; - Type type; - - float p_for_lp_dist = -1.0; - float distance = -1.0; -}; - - -// Class ANNCondition, is responsible for recognizing if the query is an ANN queries which can utilize ANN indexes. It parses the SQL query -/// and checks if it matches ANNIndexes. Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has -/// only one argument, the name of the metric with which index was built. Two main patterns of queries are supported +/// Class VectorSimilarityCondition is responsible for recognizing if the query can utilize vector similarity indexes. +/// Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has +/// only one argument, the name of the distance function with which index was built. Two main patterns of queries are supported /// /// - 1. WHERE queries: /// SELECT * FROM * WHERE DistanceFunc(column, reference_vector) < floatLiteral LIMIT count @@ -64,14 +21,14 @@ struct ApproximateNearestNeighborInformation /// /// Queries without LIMIT count are not supported /// If the query is both of type 1. and 2., than we can't use the index and alwaysUnknownOrTrue returns true. -/// reference_vector should have float coordinates, e.g. (0.2, 0.1, .., 0.5) +/// reference_vector should have float coordinates, e.g. [0.2, 0.1, .., 0.5] /// -/// If the query matches one of these two types, then this class extracts the main information needed for ANN indexes from the query. +/// If the query matches one of these two types, then this class extracts the main information needed for vector similarity indexes from the +/// query. /// /// From matching query it extracts /// - referenceVector -/// - metricName(DistanceFunction) -/// - dimension size if query uses LpDistance +/// - distance function /// - distance to compare(ONLY for search types, otherwise you get exception) /// - spaceDimension(which is referenceVector's components count) /// - column @@ -79,35 +36,45 @@ struct ApproximateNearestNeighborInformation /// - queryHasOrderByClause and queryHasWhereClause return true if query matches the type /// /// Search query type is also recognized for PREWHERE clause -class ApproximateNearestNeighborCondition +class VectorSimilarityCondition { public: - ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context); + VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context); + + /// Approximate nearest neighbour (ANN) / vector similarity queries have a similar structure: + /// - reference vector from which all distances are calculated + /// - distance function, e.g L2Distance + /// - name of column with embeddings + /// - type of query + /// - maximum number of returned elements (LIMIT) + /// + /// And one optional parameter: + /// - distance to compare with (only for where queries) + /// + /// This struct holds all these components. + struct Info + { + enum class DistanceFunction : uint8_t + { + Unknown, + L2 + }; + + std::vector reference_vector; + DistanceFunction distance_function; + String column_name; + UInt64 limit; + float distance = -1.0; + }; /// Returns false if query can be speeded up by an ANN index, true otherwise. - bool alwaysUnknownOrTrue(String metric) const; + bool alwaysUnknownOrTrue(String distance_function) const; - /// Returns the distance to compare with for search query - float getComparisonDistanceForWhereQuery() const; - - /// Distance should be calculated regarding to referenceVector - std::vector getReferenceVector() const; - - /// Reference vector's dimension count + std::vector getReferenceVector() const; size_t getDimensions() const; - String getColumnName() const; - - ApproximateNearestNeighborInformation::Metric getMetricType() const; - - /// The P- value if the metric is 'LpDistance' - float getPValueForLpDistance() const; - - ApproximateNearestNeighborInformation::Type getQueryType() const; - + Info::DistanceFunction getDistanceFunction() const; UInt64 getIndexGranularity() const { return index_granularity; } - - /// Length's value from LIMIT clause UInt64 getLimit() const; private: @@ -118,9 +85,6 @@ private: /// DistanceFunctions FUNCTION_DISTANCE, - //tuple(0.1, ..., 0.1) - FUNCTION_TUPLE, - //array(0.1, ..., 0.1) FUNCTION_ARRAY, @@ -139,9 +103,6 @@ private: /// Unknown, can be any value FUNCTION_UNKNOWN, - /// (0.1, ...., 0.1) vector without word 'tuple' - FUNCTION_LITERAL_TUPLE, - /// [0.1, ...., 0.1] vector without word 'array' FUNCTION_LITERAL_ARRAY, @@ -154,19 +115,14 @@ private: explicit RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) - , func_name("Unknown") - , float_literal(std::nullopt) - , identifier(std::nullopt) {} Function function; - String func_name; + String func_name = "Unknown"; std::optional float_literal; std::optional identifier; std::optional int_literal; - - std::optional tuple_literal; std::optional array_literal; UInt32 dim = 0; @@ -186,16 +142,16 @@ private: void traverseOrderByAST(const ASTPtr & node, RPN & rpn); /// Returns true and stores ANNExpr if the query has valid WHERE section - static bool matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & ann_info); + static bool matchRPNWhere(RPN & rpn, Info & info); /// Returns true and stores ANNExpr if the query has valid ORDERBY section - static bool matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info); + static bool matchRPNOrderBy(RPN & rpn, Info & info); /// Returns true and stores Length if we have valid LIMIT clause in query static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit); - /* Matches dist function, reference vector, column name */ - static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info); + /// Matches dist function, reference vector, column name + static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, Info & info); /// Gets float or int from AST node static float getFloatOrIntLiteralOrPanic(const RPN::iterator& iter); @@ -203,7 +159,7 @@ private: Block block_with_constants; /// true if we have one of two supported query types - std::optional query_information; + std::optional query_information; // Get from settings ANNIndex parameters const UInt64 index_granularity; @@ -214,13 +170,4 @@ private: bool index_is_useful = false; }; - -/// Common interface of ANN indexes. -class IMergeTreeIndexConditionApproximateNearestNeighbor : public IMergeTreeIndexCondition -{ -public: - /// Returns vector of indexes of ranges in granule which are useful for query. - virtual std::vector getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const = 0; -}; - } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index fb86d9e7603..3a22daa0011 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -215,6 +215,10 @@ static IMergeTreeDataPart::Checksums checkDataPart( { get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage); if (!stream_name) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index b7887c35590..9a65d590453 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -538,6 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replica_name.empty()) throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); + // '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText + if (replica_name.find('\t') != String::npos || replica_name.find('\n') != String::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'"); arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments. arg_num = 2; /// zookeeper_path and replica_name together are always two arguments. @@ -592,7 +595,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast->value.getType() != Field::Types::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); - graphite_config_name = ast->value.get(); + graphite_config_name = ast->value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 9136300ab3b..8190a375b97 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -47,6 +47,8 @@ void MessageQueueSink::consume(Chunk & chunk) if (columns.empty()) return; + /// The formatter might hold pointers to buffer (e.g. if PeekableWriteBuffer is used), which means the formatter + /// needs to be reset after buffer might reallocate its memory. In this exact case after restarting the buffer. if (row_format) { size_t row = 0; @@ -61,12 +63,12 @@ void MessageQueueSink::consume(Chunk & chunk) row_format->writeRow(columns, row); } row_format->finalize(); - row_format->resetFormatter(); producer->produce(buffer->str(), i, columns, row - 1); /// Reallocate buffer if it's capacity is large then DBMS_DEFAULT_BUFFER_SIZE, /// because most likely in this case we serialized abnormally large row /// and won't need this large allocated buffer anymore. buffer->restart(DBMS_DEFAULT_BUFFER_SIZE); + row_format->resetFormatter(); } } else @@ -74,12 +76,11 @@ void MessageQueueSink::consume(Chunk & chunk) format->write(getHeader().cloneWithColumns(chunk.detachColumns())); format->finalize(); producer->produce(buffer->str(), chunk.getNumRows(), columns, chunk.getNumRows() - 1); - format->resetFormatter(); buffer->restart(); + format->resetFormatter(); } } - void MessageQueueSink::onCancel() noexcept { try diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index f444a581eb6..bf2da7235a2 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -133,7 +133,7 @@ void validateNamedCollection( { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "Unexpected key {} in named collection. Required keys: {}, optional keys: {}", + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", backQuoteIfNeed(key), fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index c896a760597..7aadba18817 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -262,10 +262,11 @@ struct DeltaLakeMetadataImpl partition_name, file_schema.toNamesAndTypesDescription()); } + LOG_TEST(log, "Partition {} value is {} (data type: {}, file: {})", + partition_name, value, name_and_type->type->getName(), filename); + auto field = getFieldValue(value, name_and_type->type); current_partition_columns.emplace_back(*name_and_type, field); - - LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); } } } @@ -332,6 +333,8 @@ struct DeltaLakeMetadataImpl WhichDataType which(check_type->getTypeId()); if (which.isStringOrFixedString()) return value; + else if (isBool(check_type)) + return parse(value); else if (which.isInt8()) return parse(value); else if (which.isUInt8()) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index c8603fccb86..087207d3860 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -91,8 +91,9 @@ public: { ConfigurationPtr configuration = base_configuration->clone(); configuration->setPaths(metadata->getDataFiles()); + std::string sample_path; return Storage::resolveSchemaFromData( - object_storage_, configuration, format_settings_, local_context); + object_storage_, configuration, format_settings_, sample_path, local_context); } } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index df78f128c80..fadf683fce7 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -132,7 +132,7 @@ void ReadBufferIterator::setFormatName(const String & format_name) format = format_name; } -String ReadBufferIterator::getLastFileName() const +String ReadBufferIterator::getLastFilePath() const { if (current_object_info) return current_object_info->getPath(); diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 6eeb52ec2ed..b81aebb7b07 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -33,7 +33,7 @@ public: void setResultingSchema(const ColumnsDescription & columns) override; - String getLastFileName() const override; + String getLastFilePath() const override; void setFormatName(const String & format_name) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index fe675a17d54..a0f189e92fc 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -33,6 +34,33 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context) +{ + auto query_settings = configuration->getQuerySettings(context); + /// We don't want to throw an exception if there are no files with specified path. + query_settings.throw_on_zero_files_match = false; + + bool local_distributed_processing = distributed_processing; + if (context->getSettingsRef().use_hive_partitioning) + local_distributed_processing = false; + + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + query_settings, + object_storage, + local_distributed_processing, + context, + {}, // predicate + metadata.getColumns().getAll(), // virtual_columns + nullptr, // read_keys + {} // file_progress_callback + ); + + if (auto file = file_iterator->next(0)) + return file->getPath(); + return ""; +} + StorageObjectStorage::StorageObjectStorage( ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, @@ -53,7 +81,9 @@ StorageObjectStorage::StorageObjectStorage( , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) { ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context); + + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context); configuration->check(context); StorageInMemoryMetadata metadata; @@ -61,7 +91,10 @@ StorageObjectStorage::StorageObjectStorage( metadata.setConstraints(constraints_); metadata.setComment(comment); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning) + sample_path = getPathSample(metadata, context); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings)); setInMemoryMetadata(metadata); } @@ -198,7 +231,7 @@ private: return; auto context = getContext(); iterator_wrapper = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, distributed_processing, + configuration, configuration->getQuerySettings(context), object_storage, distributed_processing, context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); } }; @@ -350,6 +383,7 @@ std::unique_ptr StorageObjectStorage::createReadBufferIterat { auto file_iterator = StorageObjectStorageSource::createFileIterator( configuration, + configuration->getQuerySettings(context), object_storage, false/* distributed_processing */, context, @@ -366,33 +400,41 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); - return readSchemaFromFormat(configuration->format, format_settings, *iterator, context); + auto schema = readSchemaFromFormat(configuration->format, format_settings, *iterator, context); + sample_path = iterator->getLastFilePath(); + return schema; } std::string StorageObjectStorage::resolveFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); - return detectFormatAndReadSchema(format_settings, *iterator, context).second; + auto format_and_schema = detectFormatAndReadSchema(format_settings, *iterator, context).second; + sample_path = iterator->getLastFilePath(); + return format_and_schema; } std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context); + sample_path = iterator->getLastFilePath(); configuration->format = format; return std::pair(columns, format); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 818ce055c77..cae0db48f31 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -102,23 +102,28 @@ public: const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); static std::string resolveFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); static std::pair resolveSchemaAndFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); protected: virtual void updateConfiguration(ContextPtr local_context); + String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); + virtual ReadFromFormatInfo prepareReadingFromFormat( const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 78f568d8ae2..08a0739d929 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -1,6 +1,8 @@ #include "Storages/ObjectStorage/StorageObjectStorageCluster.h" #include +#include +#include #include #include #include @@ -19,6 +21,28 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String StorageObjectStorageCluster::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context) +{ + auto query_settings = configuration->getQuerySettings(context); + /// We don't want to throw an exception if there are no files with specified path. + query_settings.throw_on_zero_files_match = false; + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + query_settings, + object_storage, + false, // distributed_processing + context, + {}, // predicate + metadata.getColumns().getAll(), // virtual_columns + nullptr, // read_keys + {} // file_progress_callback + ); + + if (auto file = file_iterator->next(0)) + return file->getPath(); + return ""; +} + StorageObjectStorageCluster::StorageObjectStorageCluster( const String & cluster_name_, ConfigurationPtr configuration_, @@ -33,14 +57,18 @@ StorageObjectStorageCluster::StorageObjectStorageCluster( , object_storage(object_storage_) { ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, sample_path, context_); configuration->check(context_); StorageInMemoryMetadata metadata; metadata.setColumns(columns); metadata.setConstraints(constraints_); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning) + sample_path = getPathSample(metadata, context_); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path)); setInMemoryMetadata(metadata); } @@ -83,8 +111,8 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { auto iterator = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, /* distributed_processing */false, local_context, - predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); + configuration, configuration->getQuerySettings(local_context), object_storage, /* distributed_processing */false, + local_context, predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 108aa109616..0088ff28fc2 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -27,6 +27,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & context) const override; + String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); + private: void updateQueryToSendIfNeeded( ASTPtr & query, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 320799c7166..04e319cd0b8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -99,6 +99,7 @@ std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, + const StorageObjectStorage::QuerySettings & query_settings, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, @@ -116,7 +117,6 @@ std::shared_ptr StorageObjectStorageSourc throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); - auto settings = configuration->getQuerySettings(local_context); const bool is_archive = configuration->isArchive(); std::unique_ptr iterator; @@ -125,8 +125,8 @@ std::shared_ptr StorageObjectStorageSourc /// Iterate through disclosed globs and make a source for each file iterator = std::make_unique( object_storage, configuration, predicate, virtual_columns, - local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, - settings.throw_on_zero_files_match, file_progress_callback); + local_context, is_archive ? nullptr : read_keys, query_settings.list_object_keys_size, + query_settings.throw_on_zero_files_match, file_progress_callback); } else { @@ -148,7 +148,7 @@ std::shared_ptr StorageObjectStorageSourc iterator = std::make_unique( object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys, - settings.ignore_non_existent_file, file_progress_callback); + query_settings.ignore_non_existent_file, file_progress_callback); } if (is_archive) @@ -198,15 +198,17 @@ Chunk StorageObjectStorageSource::generate() const auto & object_info = reader.getObjectInfo(); const auto & filename = object_info->getFileName(); chassert(object_info->metadata); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, - {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified, - .etag = &(object_info->metadata->etag) - }); + { + .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified, + .etag = &(object_info->metadata->etag) + }, getContext()); const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) @@ -278,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade const std::shared_ptr & file_iterator, const ConfigurationPtr & configuration, const ObjectStoragePtr & object_storage, - const ReadFromFormatInfo & read_from_format_info, + ReadFromFormatInfo & read_from_format_info, const std::optional & format_settings, const std::shared_ptr & key_condition_, const ContextPtr & context_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index e466621e1e1..7ae7a2358e9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -52,6 +52,7 @@ public: static std::shared_ptr createFileIterator( ConfigurationPtr configuration, + const StorageObjectStorage::QuerySettings & query_settings, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, @@ -73,7 +74,7 @@ protected: const UInt64 max_block_size; const bool need_only_count; const size_t max_parsing_threads; - const ReadFromFormatInfo read_from_format_info; + ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; std::shared_ptr file_iterator; @@ -121,7 +122,7 @@ protected: const std::shared_ptr & file_iterator, const ConfigurationPtr & configuration, const ObjectStoragePtr & object_storage, - const ReadFromFormatInfo & read_from_format_info, + ReadFromFormatInfo & read_from_format_info, const std::optional & format_settings, const std::shared_ptr & key_condition_, const ContextPtr & context_, diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index e49e14d2a0c..73410d959e0 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -49,19 +49,20 @@ void resolveSchemaAndFormat( ObjectStoragePtr object_storage, const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, + std::string & sample_path, const ContextPtr & context) { if (columns.empty()) { if (format == "auto") std::tie(columns, format) = - StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, sample_path, context); else - columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); + columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, sample_path, context); } else if (format == "auto") { - format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context); + format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, sample_path, context); } if (!columns.hasOnlyOrdinary()) diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index 2077999df41..7ee14f50979 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -19,6 +19,7 @@ void resolveSchemaAndFormat( ObjectStoragePtr object_storage, const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, + std::string & sample_path, const ContextPtr & context); } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 4d921003e04..cde41b4afff 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl() { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes - }); + }, getContext()); return chunk; } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index 0f3d0ab2e92..c085287e4f3 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -128,7 +128,7 @@ private: const std::shared_ptr file_iterator; const ConfigurationPtr configuration; const ObjectStoragePtr object_storage; - const ReadFromFormatInfo read_from_format_info; + ReadFromFormatInfo read_from_format_info; const std::optional format_settings; const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index f51a7a913b8..9452ce81e9e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -161,14 +161,15 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( configuration->check(context_); ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context_); configuration->check(context_); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_)); setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..01f78673ed8 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -659,7 +659,7 @@ void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) { - std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name); + std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", doubleQuoteString(publication_name)); tx.exec(query_str); LOG_DEBUG(log, "Dropped publication: {}", publication_name); } @@ -667,7 +667,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name) { - std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name)); + std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", doubleQuoteString(publication_name), doubleQuoteWithSchema(table_name)); ntx.exec(query_str); LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name); } diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp index 57dff958b9a..432659f51f8 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -19,7 +19,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const { if (part_statistics.empty()) - return default_normal_cond_factor * rows; + return default_cond_range_factor * rows; Float64 result = 0; Float64 part_rows = 0; for (const auto & [key, estimator] : part_statistics) @@ -39,13 +39,7 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual { if (part_statistics.empty()) { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (!float_val) - return default_unknown_cond_factor * rows; - else if (float_val.value() < - threshold || float_val.value() > threshold) - return default_normal_cond_factor * rows; - else - return default_good_cond_factor * rows; + return default_cond_equal_factor * rows; } Float64 result = 0; Float64 partial_cnt = 0; @@ -149,30 +143,22 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode auto [op, val] = extractBinaryOp(node, col); + if (dummy) + { + if (op == "equals") + return default_cond_equal_factor * total_rows; + else if (op == "less" || op == "lessOrEquals" || op == "greater" || op == "greaterOrEquals") + return default_cond_range_factor * total_rows; + else + return default_unknown_cond_factor * total_rows; + } + if (op == "equals") - { - if (dummy) - { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (!float_val || (float_val < - threshold || float_val > threshold)) - return default_normal_cond_factor * total_rows; - else - return default_good_cond_factor * total_rows; - } return estimator.estimateEqual(val, total_rows); - } else if (op == "less" || op == "lessOrEquals") - { - if (dummy) - return default_normal_cond_factor * total_rows; return estimator.estimateLess(val, total_rows); - } else if (op == "greater" || op == "greaterOrEquals") - { - if (dummy) - return default_normal_cond_factor * total_rows; return estimator.estimateGreater(val, total_rows); - } else return default_unknown_cond_factor * total_rows; } diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.h b/src/Storages/Statistics/ConditionSelectivityEstimator.h index ce7fdd12e92..269ee9ac6cb 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.h +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h @@ -38,12 +38,10 @@ private: std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; - static constexpr auto default_good_cond_factor = 0.1; - static constexpr auto default_normal_cond_factor = 0.5; - static constexpr auto default_unknown_cond_factor = 1.0; - /// Conditions like "x = N" are considered good if abs(N) > threshold. - /// This is used to assume that condition is likely to have good selectivity. - static constexpr auto threshold = 2; + /// Used to estimate the selectivity of a condition when there is no statistics. + static constexpr auto default_cond_range_factor = 0.5; + static constexpr auto default_cond_equal_factor = 0.01; + static constexpr auto default_unknown_cond_factor = 1; UInt64 total_rows = 0; std::map column_estimators; diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index ade3326288a..6372c804e0e 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,14 +1,17 @@ #include +#include +#include +#include +#include #include #include +#include #include #include #include #include #include #include -#include -#include #include "config.h" /// USE_DATASKETCHES @@ -27,33 +30,26 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; -std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) +std::optional StatisticsUtils::tryConvertToFloat64(const Field & value, const DataTypePtr & data_type) { - switch (field.getType()) + if (data_type->isValueRepresentedByNumber()) { - case Field::Types::Int64: - return field.get(); - case Field::Types::UInt64: - return field.get(); - case Field::Types::Float64: - return field.get(); - case Field::Types::Int128: - return field.get(); - case Field::Types::UInt128: - return field.get(); - case Field::Types::Int256: - return field.get(); - case Field::Types::UInt256: - return field.get(); - default: - return {}; - } -} + Field value_converted; -std::optional StatisticsUtils::tryConvertToString(const DB::Field & field) -{ - if (field.getType() == Field::Types::String) - return field.get(); + if (isInteger(data_type) && (value.getType() == Field::Types::Float64 || value.getType() == Field::Types::String)) + /// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64. + value_converted = convertFieldToType(value, *DataTypeFactory::instance().get("Float64")); + else + /// We should convert value to the real column data type and then translate it to Float64. + /// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null. + value_converted = convertFieldToType(value, *data_type); + + if (value_converted.isNull()) + return {}; + + Float64 value_as_float = applyVisitor(FieldVisitorConvertToNumber(), value_converted); + return value_as_float; + } return {}; } @@ -62,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_) { } -ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_) +ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_) + : stats_desc(stats_desc_), column_name(column_name_) { } @@ -89,21 +85,23 @@ Float64 IStatistics::estimateLess(const Field & /*val*/) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); } -/// ------------------------------------- -/// Implementation of the estimation: -/// Note: Each statistics object supports certain types predicates natively, e.g. -/// - TDigest: '< X' (less-than predicates) -/// - Count-min sketches: '= X' (equal predicates) -/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) -/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. -/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics -/// object that supports it natively. +/// Notes: +/// - Statistics object usually only support estimation for certain types of predicates, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// +/// If multiple statistics objects in a column support estimating a predicate, we want to try statistics in order of descending accuracy +/// (e.g. MinMax statistics are simpler than TDigest statistics and thus worse for estimating 'less' predicates). +/// +/// Sometimes, it is possible to combine multiple statistics in a clever way. For that reason, all estimation are performed in a central +/// place (here), and we don't simply pass the predicate to the first statistics object that supports it natively. Float64 ColumnStatistics::estimateLess(const Field & val) const { if (stats.contains(StatisticsType::TDigest)) return stats.at(StatisticsType::TDigest)->estimateLess(val); - return rows * ConditionSelectivityEstimator::default_normal_cond_factor; + return rows * ConditionSelectivityEstimator::default_cond_range_factor; } Float64 ColumnStatistics::estimateGreater(const Field & val) const @@ -113,8 +111,7 @@ Float64 ColumnStatistics::estimateGreater(const Field & val) const Float64 ColumnStatistics::estimateEqual(const Field & val) const { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) + if (stats_desc.data_type->isValueRepresentedByNumber() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) @@ -124,10 +121,7 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const if (stats.contains(StatisticsType::CountMinSketch)) return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val); #endif - if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold)) - return rows * ConditionSelectivityEstimator::default_normal_cond_factor; - else - return rows * ConditionSelectivityEstimator::default_good_cond_factor; + return rows * ConditionSelectivityEstimator::default_cond_equal_factor; } /// ------------------------------------- @@ -182,7 +176,7 @@ String ColumnStatistics::getFileName() const const String & ColumnStatistics::columnName() const { - return stats_desc.column_name; + return column_name; } UInt64 ColumnStatistics::rowCount() const @@ -204,15 +198,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerValidator(StatisticsType::TDigest, tdigestValidator); - registerCreator(StatisticsType::TDigest, tdigestCreator); + registerValidator(StatisticsType::TDigest, tdigestStatisticsValidator); + registerCreator(StatisticsType::TDigest, tdigestStatisticsCreator); - registerValidator(StatisticsType::Uniq, uniqValidator); - registerCreator(StatisticsType::Uniq, uniqCreator); + registerValidator(StatisticsType::Uniq, uniqStatisticsValidator); + registerCreator(StatisticsType::Uniq, uniqStatisticsCreator); #if USE_DATASKETCHES - registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator); - registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator); + registerValidator(StatisticsType::CountMinSketch, countMinSketchStatisticsValidator); + registerCreator(StatisticsType::CountMinSketch, countMinSketchStatisticsCreator); #endif } @@ -222,7 +216,7 @@ MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() return instance; } -void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const +void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & stats, const DataTypePtr & data_type) const { for (const auto & [type, desc] : stats.types_to_desc) { @@ -233,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st } } -ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const +ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const { - ColumnStatisticsPtr column_stat = std::make_shared(stats); - for (const auto & [type, desc] : stats.types_to_desc) + ColumnStatisticsPtr column_stat = std::make_shared(column_desc.statistics, column_desc.name); + for (const auto & [type, desc] : column_desc.statistics.types_to_desc) { auto it = creators.find(type); if (it == creators.end()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); - auto stat_ptr = (it->second)(desc, stats.data_type); + auto stat_ptr = (it->second)(desc, column_desc.type); column_stat->stats[type] = stat_ptr; } return column_stat; @@ -252,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription & ColumnsStatistics result; for (const auto & col : columns) if (!col.statistics.empty()) - result.push_back(get(col.statistics)); + result.push_back(get(col)); return result; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 16f0c67eabd..98666ed73df 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -14,12 +14,11 @@ namespace DB constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; - struct StatisticsUtils { /// Returns std::nullopt if input Field cannot be converted to a concrete value - static std::optional tryConvertToFloat64(const Field & field); - static std::optional tryConvertToString(const Field & field); + /// - `data_type` is the type of the column on which the statistics object was build on + static std::optional tryConvertToFloat64(const Field & value, const DataTypePtr & data_type); }; /// Statistics describe properties of the values in the column, @@ -55,7 +54,7 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_); void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); @@ -74,10 +73,12 @@ public: private: friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; + String column_name; std::map stats; UInt64 rows = 0; /// the number of rows in the column }; +struct ColumnDescription; class ColumnsDescription; using ColumnStatisticsPtr = std::shared_ptr; using ColumnsStatistics = std::vector; @@ -87,12 +88,12 @@ class MergeTreeStatisticsFactory : private boost::noncopyable public: static MergeTreeStatisticsFactory & instance(); - void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + void validate(const ColumnStatisticsDescription & stats, const DataTypePtr & data_type) const; - using Validator = std::function; - using Creator = std::function; + using Validator = std::function; + using Creator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; + ColumnStatisticsPtr get(const ColumnDescription & column_desc) const; ColumnsStatistics getMany(const ColumnsDescription & columns) const; void registerValidator(StatisticsType type, Validator validator); diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index e69bbc1515b..6dbd0625d3d 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -25,8 +25,8 @@ extern const int ILLEGAL_STATISTICS; static constexpr auto num_hashes = 7uz; static constexpr auto num_buckets = 2718uz; -StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) - : IStatistics(stat_) +StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & description, const DataTypePtr & data_type_) + : IStatistics(description) , sketch(num_hashes, num_buckets) , data_type(data_type_) { @@ -48,7 +48,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); if (isStringOrFixedString(data_type)) - return sketch.get_estimate(val.get()); + return sketch.get_estimate(val.safeGet()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); } @@ -84,17 +84,17 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) } -void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { - data_type = removeNullable(data_type); - data_type = removeLowCardinalityAndNullable(data_type); - if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) + DataTypePtr inner_data_type = removeNullable(data_type); + inner_data_type = removeLowCardinalityAndNullable(inner_data_type); + if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(inner_data_type)) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); } -StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h index 6c8b74f8c35..d1de1a3aea5 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.h +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -14,7 +14,7 @@ namespace DB class StatisticsCountMinSketch : public IStatistics { public: - StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); + StatisticsCountMinSketch(const SingleStatisticsDescription & description, const DataTypePtr & data_type_); Float64 estimateEqual(const Field & val) const override; @@ -31,8 +31,8 @@ private: }; -void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void countMinSketchStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 66150e00fdb..285b779036f 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,33 +1,29 @@ #include -#include #include +#include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_STATISTICS; -extern const int LOGICAL_ERROR; } -StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) - : IStatistics(stat_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & description, const DataTypePtr & data_type_) + : IStatistics(description) + , data_type(data_type_) { } void StatisticsTDigest::update(const ColumnPtr & column) { - size_t rows = column->size(); - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < column->size(); ++row) { - Field field; - column->get(row, field); - - if (field.isNull()) + if (column->isNullAt(row)) continue; - if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field)) - t_digest.add(*field_as_float, 1); + auto data = column->getFloat64(row); + t_digest.add(data, 1); } } @@ -43,31 +39,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf) Float64 StatisticsTDigest::estimateLess(const Field & val) const { - auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); - if (val_as_float) - return t_digest.getCountLessThan(*val_as_float); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type); + if (!val_as_float.has_value()) + return 0; + return t_digest.getCountLessThan(*val_as_float); } Float64 StatisticsTDigest::estimateEqual(const Field & val) const { - auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); - if (val_as_float) - return t_digest.getCountEqual(*val_as_float); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type); + if (!val_as_float.has_value()) + return 0; + return t_digest.getCountEqual(*val_as_float); } -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { - data_type = removeNullable(data_type); - data_type = removeLowCardinalityAndNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) + DataTypePtr inner_data_type = removeNullable(data_type); + inner_data_type = removeLowCardinalityAndNullable(inner_data_type); + if (!inner_data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) { - return std::make_shared(stat); + return std::make_shared(description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index 614973e5d8b..5e744fee2ce 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -9,7 +9,7 @@ namespace DB class StatisticsTDigest : public IStatistics { public: - explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & description, const DataTypePtr & data_type_); void update(const ColumnPtr & column) override; @@ -21,9 +21,10 @@ public: private: QuantileTDigest t_digest; + DataTypePtr data_type; }; -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 8f60ffcf0b5..07311b5b86d 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -11,8 +11,8 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) - : IStatistics(stat_) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type) + : IStatistics(description) { arena = std::make_unique(); AggregateFunctionProperties properties; @@ -52,17 +52,17 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { - data_type = removeNullable(data_type); - data_type = removeLowCardinalityAndNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) + DataTypePtr inner_data_type = removeNullable(data_type); + inner_data_type = removeLowCardinalityAndNullable(inner_data_type); + if (!inner_data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index faabde8d47c..1fdcab8bd89 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -10,7 +10,7 @@ namespace DB class StatisticsUniq : public IStatistics { public: - StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type); ~StatisticsUniq() override; void update(const ColumnPtr & column) override; @@ -27,7 +27,7 @@ private: }; -void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); } diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 63c849e3806..64634124758 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include @@ -97,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe { chassert(merging_column_type); - if (column_name.empty()) - column_name = merging_column_name; - data_type = merging_column_type; for (const auto & [stats_type, stats_desc]: other.types_to_desc) { if (!if_not_exists && types_to_desc.contains(stats_type)) { - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name); } else if (!types_to_desc.contains(stats_type)) types_to_desc.emplace(stats_type, stats_desc); @@ -115,9 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other) { - if (other.column_name != column_name) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name); - types_to_desc = other.types_to_desc; data_type = other.data_type; } @@ -127,7 +120,7 @@ void ColumnStatisticsDescription::clear() types_to_desc.clear(); } -std::vector ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) +std::vector> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) { const auto * stat_definition_ast = definition_ast->as(); if (!stat_definition_ast) @@ -145,7 +138,7 @@ std::vector ColumnStatisticsDescription::fromAST(co statistics_types.emplace(stat.type, stat); } - std::vector result; + std::vector> result; result.reserve(stat_definition_ast->columns->children.size()); for (const auto & column_ast : stat_definition_ast->columns->children) @@ -157,10 +150,9 @@ std::vector ColumnStatisticsDescription::fromAST(co throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name); const auto & column = columns.getPhysical(physical_column_name); - stats.column_name = column.name; stats.data_type = column.type; stats.types_to_desc = statistics_types; - result.push_back(stats); + result.emplace_back(physical_column_name, stats); } if (result.empty()) @@ -175,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c if (stat_type_list_ast->children.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column)); ColumnStatisticsDescription stats; - stats.column_name = column.name; for (const auto & ast : stat_type_list_ast->children) { const auto & stat_type = ast->as().name; SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone()); if (stats.types_to_desc.contains(stat.type)) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type); stats.types_to_desc.emplace(stat.type, std::move(stat)); } stats.data_type = data_type; diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 03b8fb0d583..46927f1418c 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -55,12 +55,12 @@ struct ColumnStatisticsDescription ASTPtr getAST() const; - static std::vector fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); + /// get a vector of pair + static std::vector> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type); using StatisticsTypeDescMap = std::map; StatisticsTypeDescMap types_to_desc; - String column_name; DataTypePtr data_type; }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index e146e95f89f..0b80858800b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,6 +290,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); + /// Add virtual columns from table with Merge engine. + desc.addEphemeral("_database", std::make_shared(std::make_shared()), "The name of database which the row comes from"); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), "The name of table which the row comes from"); + return desc; } diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index 951c87807bb..9fc8b588c89 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -1,4 +1,4 @@ -#include "StorageExternalDistributed.h" +#include #include #include @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -112,14 +114,39 @@ void registerStorageExternalDistributed(StorageFactory & factory) std::unordered_set shards; ASTs inner_engine_args(engine_args.begin() + 1, engine_args.end()); + ASTPtr * address_arg = nullptr; + + /// If there is a named collection argument, named `addresses_expr` + for (auto & node : inner_engine_args) + { + if (ASTFunction * func = node->as(); func && func->name == "equals" && func->arguments) + { + if (ASTExpressionList * func_args = func->arguments->as(); func_args && func_args->children.size() == 2) + { + if (ASTIdentifier * arg_name = func_args->children[0]->as(); arg_name && arg_name->name() == "addresses_expr") + { + address_arg = &func_args->children[1]; + break; + } + } + } + } + + /// Otherwise it is the first argument. + if (!address_arg) + address_arg = &inner_engine_args.at(0); + + String addresses_expr = checkAndGetLiteralArgument(*address_arg, "addresses"); + Strings shards_addresses = get_addresses(addresses_expr); + auto engine_name = checkAndGetLiteralArgument(engine_args[0], "engine_name"); if (engine_name == "URL") { - auto configuration = StorageURL::getConfiguration(inner_engine_args, context); - auto shards_addresses = get_addresses(configuration.addresses_expr); auto format_settings = StorageURL::getFormatSettingsFromArgs(args); for (const auto & shard_address : shards_addresses) { + *address_arg = std::make_shared(shard_address); + auto configuration = StorageURL::getConfiguration(inner_engine_args, context); auto uri_options = parseRemoteDescription(shard_address, 0, shard_address.size(), '|', max_addresses); if (uri_options.size() > 1) { @@ -140,13 +167,12 @@ void registerStorageExternalDistributed(StorageFactory & factory) else if (engine_name == "MySQL") { MySQLSettings mysql_settings; - auto configuration = StorageMySQL::getConfiguration(inner_engine_args, context, mysql_settings); - auto shards_addresses = get_addresses(configuration.addresses_expr); for (const auto & shard_address : shards_addresses) { - auto current_configuration{configuration}; - current_configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 3306); - auto pool = createMySQLPoolWithFailover(current_configuration, mysql_settings); + *address_arg = std::make_shared(shard_address); + auto configuration = StorageMySQL::getConfiguration(inner_engine_args, context, mysql_settings); + configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 3306); + auto pool = createMySQLPoolWithFailover(configuration, mysql_settings); shards.insert(std::make_shared( args.table_id, std::move(pool), configuration.database, configuration.table, /* replace_query = */ false, /* on_duplicate_clause = */ "", @@ -157,14 +183,13 @@ void registerStorageExternalDistributed(StorageFactory & factory) #if USE_LIBPQXX else if (engine_name == "PostgreSQL") { - auto configuration = StoragePostgreSQL::getConfiguration(inner_engine_args, context); - auto shards_addresses = get_addresses(configuration.addresses_expr); for (const auto & shard_address : shards_addresses) { - auto current_configuration{configuration}; - current_configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 5432); + *address_arg = std::make_shared(shard_address); + auto configuration = StoragePostgreSQL::getConfiguration(inner_engine_args, context); + configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 5432); auto pool = std::make_shared( - current_configuration, + configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, settings.postgresql_connection_pool_retries, diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index c4d37c3e5cc..56c7fe86f34 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -8,8 +8,6 @@ namespace DB { -struct ExternalDataSourceConfiguration; - /// Storages MySQL and PostgreSQL use ConnectionPoolWithFailover and support multiple replicas. /// This class unites multiple storages with replicas into multiple shards with replicas. /// A query to external database is passed to one replica on each shard, the result is united. diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 060b271d8f4..b95ccedb093 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -203,7 +203,7 @@ StoragePtr StorageFactory::get( } if (query.comment) - comment = query.comment->as().value.get(); + comment = query.comment->as().value.safeGet(); ASTs empty_engine_args; Arguments arguments{ diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 324c88d643d..50294df32a4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include "base/defines.h" #include @@ -516,7 +517,7 @@ namespace StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } - String getLastFileName() const override + String getLastFilePath() const override { if (current_index != 0) return paths[current_index - 1]; @@ -793,7 +794,7 @@ namespace format = format_name; } - String getLastFileName() const override + String getLastFilePath() const override { return last_read_file_path; } @@ -1111,8 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings)); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } @@ -1466,7 +1468,7 @@ Chunk StorageFileSource::generate() .size = current_file_size, .filename = (filename_override.has_value() ? &filename_override.value() : nullptr), .last_modified = current_file_last_modified - }); + }, getContext()); return chunk; } @@ -2185,12 +2187,12 @@ void registerStorageFile(StorageFactory & factory) { auto type = literal->value.getType(); if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::String) StorageFile::parseFileSource( - literal->value.get(), + literal->value.safeGet(), source_path, storage_args.path_to_archive, factory_args.getLocalContext()->getSettingsRef().allow_archive_path_syntax); diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index d43e242f70c..c01738067c4 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster( } storage_metadata.setConstraints(constraints_); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0])); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 9950d41f1c2..fc73f246d35 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -419,7 +419,7 @@ void fuzzJSONObject( if (val.fixed->getType() == Field::Types::Which::String) { out << fuzzJSONStructure(config, rnd, "\""); - writeText(val.fixed->get(), out); + writeText(val.fixed->safeGet(), out); out << fuzzJSONStructure(config, rnd, "\""); } else diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..6e8f425f8dc --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,169 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) + { + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); + fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); + + if (base_before_fuzz == fuzzed_text) + continue; + + /// AST is too long, will start from the original query. + if (config.max_query_length > 500) + { + fuzz_base = query; + continue; + } + + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; + data_to.resize(next_offset); + + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); + + data_to[offset + fuzzed_text.size()] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + fuzz_base = new_query; + ++row_num; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, false); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + // Supported signatures: + // + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() >= 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..125ef960e74 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query; + UInt64 max_query_length = 500; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + , fuzzer(config_.random_seed) + { + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 2226de3e64f..4a655cac566 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -334,10 +335,17 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( NameSet required_ttl_columns; NameSet updated_ttl_columns; - auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set) + auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set, bool is_projection = false) { for (const auto & dependency : required_columns) { + /// useful in the case of lightweight delete with wide part and option of rebuild projection + if (is_projection && updated_columns.contains(RowExistsColumn::name)) + { + to_set.insert(required_columns.begin(), required_columns.end()); + return true; + } + if (updated_columns.contains(dependency)) { to_set.insert(required_columns.begin(), required_columns.end()); @@ -357,7 +365,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( for (const auto & projection : getProjections()) { if (has_dependency(projection.name, ColumnDependency::PROJECTION)) - add_dependent_columns(projection.getRequiredColumns(), projections_columns); + add_dependent_columns(projection.getRequiredColumns(), projections_columns, true); } auto add_for_rows_ttl = [&](const auto & expression, auto & to_set) diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 9dace45d2ac..efa15c382dd 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -383,10 +383,10 @@ void registerStorageJoin(StorageFactory & factory) else if (setting.name == "any_join_distinct_right_table_keys") old_any_join = setting.value; else if (setting.name == "disk") - disk_name = setting.value.get(); + disk_name = setting.value.safeGet(); else if (setting.name == "persistent") { - persistent = setting.value.get(); + persistent = setting.value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting {} for storage {}", setting.name, args.engine_name); @@ -546,7 +546,11 @@ protected: return {}; Chunk chunk; - if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(), + if (!joinDispatch( + join->kind, + join->strictness, + join->data->maps.front(), + join->table_join->getMixedJoinExpression() != nullptr, [&](auto kind, auto strictness, auto & map) { chunk = createChunk(map); })) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness"); return chunk; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 696136834d4..e1256032493 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include #include #include @@ -146,6 +149,13 @@ StorageMaterializedView::StorageMaterializedView( if (point_to_itself_by_uuid || point_to_itself_by_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName()); + if (query.refresh_strategy) + { + fixed_uuid = false; + refresher = RefreshTask::create(this, getContext(), *query.refresh_strategy); + refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty; + } + if (!has_inner_table) { target_table_id = to_table_id; @@ -198,16 +208,6 @@ StorageMaterializedView::StorageMaterializedView( target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->getDatabase(), manual_create_query->getTable()}, getContext())->getStorageID(); } - - if (query.refresh_strategy) - { - fixed_uuid = false; - refresher = RefreshTask::create( - *this, - getContext(), - *query.refresh_strategy); - refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty; - } } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -378,44 +378,71 @@ bool StorageMaterializedView::optimize( return storage_ptr->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); } -std::tuple> StorageMaterializedView::prepareRefresh() const +ContextMutablePtr StorageMaterializedView::createRefreshContext() const { auto refresh_context = getInMemoryMetadataPtr()->getSQLSecurityOverriddenContext(getContext()); + refresh_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY); /// Generate a random query id. refresh_context->setCurrentQueryId(""); + /// TODO: Set view's definer as the current user in refresh_context, so that the correct user's + /// quotas and permissions apply for this query. + return refresh_context; +} - CurrentThread::QueryScope query_scope(refresh_context); - +std::shared_ptr StorageMaterializedView::prepareRefresh(bool append, ContextMutablePtr refresh_context, std::optional & out_temp_table_id) const +{ auto inner_table_id = getTargetTableId(); - auto new_table_name = ".tmp" + generateInnerTableName(getStorageID()); + StorageID target_table = inner_table_id; - auto db = DatabaseCatalog::instance().getDatabase(inner_table_id.database_name); + if (!append) + { + CurrentThread::QueryScope query_scope(refresh_context); - auto create_table_query = db->getCreateTableQuery(inner_table_id.table_name, getContext()); - auto & create_query = create_table_query->as(); - create_query.setTable(new_table_name); - create_query.setDatabase(db->getDatabaseName()); - create_query.create_or_replace = true; - create_query.replace_table = true; - create_query.uuid = UUIDHelpers::Nil; + auto db = DatabaseCatalog::instance().getDatabase(inner_table_id.database_name); + String db_name = db->getDatabaseName(); + auto new_table_name = ".tmp" + generateInnerTableName(getStorageID()); - InterpreterCreateQuery create_interpreter(create_table_query, refresh_context); - create_interpreter.setInternal(true); - create_interpreter.execute(); + auto create_table_query = db->getCreateTableQuery(inner_table_id.table_name, getContext()); + auto & create_query = create_table_query->as(); + create_query.setTable(new_table_name); + create_query.setDatabase(db->getDatabaseName()); + create_query.create_or_replace = true; + create_query.replace_table = true; + create_query.uuid = UUIDHelpers::Nil; - StorageID fresh_table = DatabaseCatalog::instance().getTable({create_query.getDatabase(), create_query.getTable()}, getContext())->getStorageID(); + InterpreterCreateQuery create_interpreter(create_table_query, refresh_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + target_table = DatabaseCatalog::instance().getTable({db_name, new_table_name}, getContext())->getStorageID(); + out_temp_table_id = target_table; + } auto insert_query = std::make_shared(); insert_query->select = getInMemoryMetadataPtr()->getSelectQuery().select_query; - insert_query->setTable(fresh_table.table_name); - insert_query->setDatabase(fresh_table.database_name); - insert_query->table_id = fresh_table; + insert_query->setTable(target_table.table_name); + insert_query->setDatabase(target_table.database_name); + insert_query->table_id = target_table; - return {refresh_context, insert_query}; + Block header; + if (refresh_context->getSettingsRef().allow_experimental_analyzer) + header = InterpreterSelectQueryAnalyzer::getSampleBlock(insert_query->select, refresh_context); + else + header = InterpreterSelectWithUnionQuery(insert_query->select, refresh_context, SelectQueryOptions()).getSampleBlock(); + + auto columns = std::make_shared(','); + for (const String & name : header.getNames()) + columns->children.push_back(std::make_shared(name)); + insert_query->columns = std::move(columns); + + return insert_query; } StorageID StorageMaterializedView::exchangeTargetTable(StorageID fresh_table, ContextPtr refresh_context) { + /// Known problem: if the target table was ALTERed during refresh, this will effectively revert + /// the ALTER. + auto stale_table_id = getTargetTableId(); auto db = DatabaseCatalog::instance().getDatabase(stale_table_id.database_name); @@ -423,15 +450,40 @@ StorageID StorageMaterializedView::exchangeTargetTable(StorageID fresh_table, Co CurrentThread::QueryScope query_scope(refresh_context); - target_db->renameTable( - refresh_context, fresh_table.table_name, *db, stale_table_id.table_name, /*exchange=*/true, /*dictionary=*/false); + auto rename_query = std::make_shared(); + rename_query->exchange = true; + rename_query->addElement(fresh_table.database_name, fresh_table.table_name, stale_table_id.database_name, stale_table_id.table_name); + + InterpreterRenameQuery(rename_query, refresh_context).execute(); std::swap(stale_table_id.database_name, fresh_table.database_name); std::swap(stale_table_id.table_name, fresh_table.table_name); + setTargetTableId(std::move(fresh_table)); return stale_table_id; } +void StorageMaterializedView::dropTempTable(StorageID table_id, ContextMutablePtr refresh_context) +{ + CurrentThread::QueryScope query_scope(refresh_context); + + try + { + auto drop_query = std::make_shared(); + drop_query->setDatabase(table_id.database_name); + drop_query->setTable(table_id.table_name); + drop_query->kind = ASTDropQuery::Kind::Drop; + drop_query->if_exists = true; + drop_query->sync = false; + + InterpreterDropQuery(drop_query, refresh_context).execute(); + } + catch (...) + { + tryLogCurrentException(&Poco::Logger::get("StorageMaterializedView"), "Failed to drop temporary table after refresh"); + } +} + void StorageMaterializedView::alter( const AlterCommands & params, ContextPtr local_context, @@ -531,25 +583,11 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) { auto new_target_table_name = generateInnerTableName(new_table_id); - ASTRenameQuery::Elements rename_elements; assert(inner_table_id.database_name == old_table_id.database_name); - ASTRenameQuery::Element elem - { - ASTRenameQuery::Table - { - inner_table_id.database_name.empty() ? nullptr : std::make_shared(inner_table_id.database_name), - std::make_shared(inner_table_id.table_name) - }, - ASTRenameQuery::Table - { - new_table_id.database_name.empty() ? nullptr : std::make_shared(new_table_id.database_name), - std::make_shared(new_target_table_name) - } - }; - rename_elements.emplace_back(std::move(elem)); + auto rename = std::make_shared(); + rename->addElement(inner_table_id.database_name, inner_table_id.table_name, new_table_id.database_name, new_target_table_name); - auto rename = std::make_shared(std::move(rename_elements)); InterpreterRenameQuery(rename, getContext()).execute(); updateTargetTableId(new_table_id.database_name, new_target_table_name); } @@ -577,7 +615,7 @@ void StorageMaterializedView::startup() if (refresher) { - refresher->initializeAndStart(std::static_pointer_cast(shared_from_this())); + refresher->initializeAndStart(); if (refresh_on_start) refresher->run(); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 5ecd2ec3819..a09ee07b3f6 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace DB { @@ -106,7 +106,7 @@ private: /// Will be initialized in constructor StorageID target_table_id = StorageID::createEmpty(); - RefreshTaskHolder refresher; + OwnedRefreshTask refresher; bool refresh_on_start = false; bool has_inner_table = false; @@ -119,10 +119,14 @@ private: void checkStatementCanBeForwarded() const; - /// Prepare to refresh a refreshable materialized view: create query context, create temporary - /// table, form the insert-select query. - std::tuple> prepareRefresh() const; + ContextMutablePtr createRefreshContext() const; + /// Prepare to refresh a refreshable materialized view: create temporary table and form the + /// insert-select query. + /// out_temp_table_id may be assigned before throwing an exception, in which case the caller + /// must drop the temp table before rethrowing. + std::shared_ptr prepareRefresh(bool append, ContextMutablePtr refresh_context, std::optional & out_temp_table_id) const; StorageID exchangeTargetTable(StorageID fresh_table, ContextPtr refresh_context); + void dropTempTable(StorageID table, ContextMutablePtr refresh_context); void setTargetTableId(StorageID id); void updateTargetTableId(std::optional database_name, std::optional table_name); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e88844e2d31..0827321e296 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -642,10 +642,6 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } } - else - { - - } auto child = createPlanForTable( nested_storage_snaphsot, @@ -657,6 +653,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + child.plan.addInterpreterContext(modified_context); if (child.plan.isInitialized()) @@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo modified_query_info.table_expression = replacement_table_expression; modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot_->storage.supportsSubcolumns()) - get_column_options.withSubcolumns(); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All) + .withExtendedObjects() + .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns()); std::unordered_map column_name_to_node; + /// Consider only non-virtual columns of storage while checking for _table and _database columns. + /// I.e. always override virtual columns with these names from underlying table (if any). if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); @@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo column_name_to_node.emplace("_database", function_node); } + get_column_options.withVirtuals(); auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 78dbb72c199..f7701a2aab8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -895,7 +895,7 @@ void StorageMergeTree::loadDeduplicationLog() std::string path = fs::path(relative_data_path) / "deduplication_logs"; /// If either there is already a deduplication log, or we will be able to use it. - if (disk->exists(path) || !disk->isReadOnly()) + if (!disk->isReadOnly() || disk->exists(path)) { deduplication_log = std::make_unique(path, settings->non_replicated_deduplication_window, format_version, disk); deduplication_log->load(); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index d3214e7ed13..e0a4af68824 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -294,7 +294,7 @@ public: { const auto * array_type = typeid_cast(data_type.get()); const auto & nested = array_type->getNestedType(); - const auto & array = array_field.get(); + const auto & array = array_field.safeGet(); if (!isArray(nested)) { @@ -312,7 +312,7 @@ public: if (!isArray(nested_array_type->getNestedType())) { - parseArrayContent(iter->get(), nested, ostr); + parseArrayContent(iter->safeGet(), nested, ostr); } else { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a3c1ab7cdff..ff8e362aa36 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -800,7 +800,8 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() { auto res = future.get(); if (res.error != Coordination::Error::ZOK && res.error != Coordination::Error::ZNODEEXISTS) - throw Coordination::Exception(res.error, "Failed to create new nodes {} at {}", res.path_created, zookeeper_path); + throw Coordination::Exception(res.error, "Failed to create new nodes {} at {} with error {}", + res.path_created, zookeeper_path, Coordination::errorMessage(res.error)); } } @@ -5193,17 +5194,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) startBeingLeader(); - /// Activate replica in a separate thread if we are not calling from attach thread - restarting_thread.start(/*schedule=*/!from_attach_thread); - if (from_attach_thread) { LOG_TRACE(log, "Trying to startup table from right now"); - /// Try activating replica in current thread. + /// Try activating replica in the current thread. restarting_thread.run(); + restarting_thread.start(false); } else { + restarting_thread.start(true); /// Wait while restarting_thread finishing initialization. /// NOTE It does not mean that replication is actually started after receiving this event. /// It only means that an attempt to startup replication was made. @@ -5224,7 +5224,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() { LOG_TEST(log, "Received event for expired session. Waking up restarting thread"); - restarting_thread.start(); + restarting_thread.start(true); }); startBackgroundMovesIfNeeded(); @@ -5293,7 +5293,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() LOG_TRACE(log, "The attach thread is shutdown"); } - restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again startup_event.set(); @@ -5704,7 +5703,8 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, node.proto_send_chunked, node.proto_recv_chunked, + SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure diff --git a/src/Storages/StorageTimeSeries.cpp b/src/Storages/StorageTimeSeries.cpp new file mode 100644 index 00000000000..3ff57aaf3e5 --- /dev/null +++ b/src/Storages/StorageTimeSeries.cpp @@ -0,0 +1,477 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int SUPPORT_IS_DISABLED; + extern const int UNEXPECTED_TABLE_ENGINE; +} + + +namespace +{ + namespace fs = std::filesystem; + + /// Loads TimeSeries storage settings from a create query. + std::shared_ptr getTimeSeriesSettingsFromQuery(const ASTCreateQuery & query) + { + auto storage_settings = std::make_shared(); + if (query.storage) + storage_settings->loadFromQuery(*query.storage); + return storage_settings; + } + + /// Creates an inner target table or just makes its storage ID. + /// This function is used by the constructor of StorageTimeSeries to find (or create) its target tables. + StorageID initTarget( + ViewTarget::Kind kind, + const ViewTarget * target_info, + const ContextPtr & context, + const StorageID & time_series_storage_id, + const ColumnsDescription & time_series_columns, + const TimeSeriesSettings & time_series_settings, + LoadingStrictnessLevel mode) + { + StorageID target_table_id = StorageID::createEmpty(); + + bool is_external_target = target_info && !target_info->table_id.empty(); + if (is_external_target) + { + /// A target table is specified. + target_table_id = target_info->table_id; + + if (mode < LoadingStrictnessLevel::ATTACH) + { + /// If it's not an ATTACH request then + /// check that the specified target table has all the required columns. + auto target_table = DatabaseCatalog::instance().getTable(target_table_id, context); + auto target_metadata = target_table->getInMemoryMetadataPtr(); + const auto & target_columns = target_metadata->columns; + TimeSeriesColumnsValidator validator{time_series_storage_id, time_series_settings}; + validator.validateTargetColumns(kind, target_table_id, target_columns); + } + } + else + { + TimeSeriesInnerTablesCreator inner_tables_creator{context, time_series_storage_id, time_series_columns, time_series_settings}; + auto inner_uuid = target_info ? target_info->inner_uuid : UUIDHelpers::Nil; + + /// An inner target table should be used. + if (mode >= LoadingStrictnessLevel::ATTACH) + { + /// If it's an ATTACH request, then the inner target table must be already created. + target_table_id = inner_tables_creator.getInnerTableID(kind, inner_uuid); + } + else + { + /// Create the inner target table. + auto inner_table_engine = target_info ? target_info->inner_engine : nullptr; + target_table_id = inner_tables_creator.createInnerTable(kind, inner_uuid, inner_table_engine); + } + } + + return target_table_id; + } +} + + +void StorageTimeSeries::normalizeTableDefinition(ASTCreateQuery & create_query, const ContextPtr & local_context) +{ + StorageID time_series_storage_id{create_query.getDatabase(), create_query.getTable()}; + TimeSeriesSettings time_series_settings; + if (create_query.storage) + time_series_settings.loadFromQuery(*create_query.storage); + std::shared_ptr as_create_query; + if (!create_query.as_table.empty()) + { + auto as_database = local_context->resolveDatabase(create_query.as_database); + as_create_query = typeid_cast>( + DatabaseCatalog::instance().getDatabase(as_database)->getCreateTableQuery(create_query.as_table, local_context)); + } + TimeSeriesDefinitionNormalizer normalizer{time_series_storage_id, time_series_settings, as_create_query.get()}; + normalizer.normalize(create_query); +} + + +StorageTimeSeries::StorageTimeSeries( + const StorageID & table_id, + const ContextPtr & local_context, + LoadingStrictnessLevel mode, + const ASTCreateQuery & query, + const ColumnsDescription & columns, + const String & comment) + : IStorage(table_id) + , WithContext(local_context->getGlobalContext()) +{ + if (mode <= LoadingStrictnessLevel::CREATE && !local_context->getSettingsRef().allow_experimental_time_series_table) + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental TimeSeries table engine " + "is not enabled (the setting 'allow_experimental_time_series_table')"); + } + + storage_settings = getTimeSeriesSettingsFromQuery(query); + + if (mode < LoadingStrictnessLevel::ATTACH) + { + TimeSeriesColumnsValidator validator{table_id, *storage_settings}; + validator.validateColumns(columns); + } + + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns); + if (!comment.empty()) + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); + + has_inner_tables = false; + + for (auto target_kind : {ViewTarget::Data, ViewTarget::Tags, ViewTarget::Metrics}) + { + const ViewTarget * target_info = query.targets ? query.targets->tryGetTarget(target_kind) : nullptr; + auto & target = targets.emplace_back(); + target.kind = target_kind; + target.table_id = initTarget(target_kind, target_info, local_context, getStorageID(), columns, *storage_settings, mode); + target.is_inner_table = target_info && target_info->table_id.empty(); + + if (target_kind == ViewTarget::Metrics && !target.is_inner_table) + { + auto table = DatabaseCatalog::instance().tryGetTable(target.table_id, getContext()); + auto metadata = table->getInMemoryMetadataPtr(); + + for (const auto & column : metadata->columns) + if (column.type->lowCardinality()) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "External metrics table cannot have LowCardnality columns for now."); + } + + has_inner_tables |= target.is_inner_table; + } +} + + +StorageTimeSeries::~StorageTimeSeries() = default; + + +TimeSeriesSettings StorageTimeSeries::getStorageSettings() const +{ + return *getStorageSettingsPtr(); +} + +void StorageTimeSeries::startup() +{ +} + +void StorageTimeSeries::shutdown(bool) +{ +} + + +void StorageTimeSeries::drop() +{ + /// Sync flag and the setting make sense for Atomic databases only. + /// However, with Atomic databases, IStorage::drop() can be called only from a background task in DatabaseCatalog. + /// Running synchronous DROP from that task leads to deadlock. + dropInnerTableIfAny(/* sync= */ false, getContext()); +} + +void StorageTimeSeries::dropInnerTableIfAny(bool sync, ContextPtr local_context) +{ + if (!has_inner_tables) + return; + + for (const auto & target : targets) + { + if (target.is_inner_table && DatabaseCatalog::instance().tryGetTable(target.table_id, getContext())) + { + /// Best-effort to make them work: the inner table name is almost always less than the TimeSeries name (so it's safe to lock DDLGuard). + /// (See the comment in StorageMaterializedView::dropInnerTableIfAny.) + bool may_lock_ddl_guard = getStorageID().getQualifiedName() < target.table_id.getQualifiedName(); + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target.table_id, + sync, /* ignore_sync_setting= */ true, may_lock_ddl_guard); + } + } +} + +void StorageTimeSeries::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) +{ + if (!has_inner_tables) + return; + + for (const auto & target : targets) + { + /// We truncate only inner tables here. + if (target.is_inner_table) + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Truncate, getContext(), local_context, target.table_id, /* sync= */ true); + } +} + + +StorageID StorageTimeSeries::getTargetTableId(ViewTarget::Kind target_kind) const +{ + for (const auto & target : targets) + { + if (target.kind == target_kind) + return target.table_id; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected target kind {}", toString(target_kind)); +} + +StoragePtr StorageTimeSeries::getTargetTable(ViewTarget::Kind target_kind, const ContextPtr & local_context) const +{ + return DatabaseCatalog::instance().getTable(getTargetTableId(target_kind), local_context); +} + +StoragePtr StorageTimeSeries::tryGetTargetTable(ViewTarget::Kind target_kind, const ContextPtr & local_context) const +{ + return DatabaseCatalog::instance().tryGetTable(getTargetTableId(target_kind), local_context); +} + + +std::optional StorageTimeSeries::totalRows(const Settings & settings) const +{ + UInt64 total_rows = 0; + if (has_inner_tables) + { + for (const auto & target : targets) + { + if (target.is_inner_table) + { + auto inner_table = DatabaseCatalog::instance().tryGetTable(target.table_id, getContext()); + if (!inner_table) + return std::nullopt; + + auto total_rows_in_inner_table = inner_table->totalRows(settings); + if (!total_rows_in_inner_table) + return std::nullopt; + + total_rows += *total_rows_in_inner_table; + } + } + } + return total_rows; +} + +std::optional StorageTimeSeries::totalBytes(const Settings & settings) const +{ + UInt64 total_bytes = 0; + if (has_inner_tables) + { + for (const auto & target : targets) + { + if (target.is_inner_table) + { + auto inner_table = DatabaseCatalog::instance().tryGetTable(target.table_id, getContext()); + if (!inner_table) + return std::nullopt; + + auto total_bytes_in_inner_table = inner_table->totalBytes(settings); + if (!total_bytes_in_inner_table) + return std::nullopt; + + total_bytes += *total_bytes_in_inner_table; + } + } + } + return total_bytes; +} + +std::optional StorageTimeSeries::totalBytesUncompressed(const Settings & settings) const +{ + UInt64 total_bytes = 0; + if (has_inner_tables) + { + for (const auto & target : targets) + { + if (target.is_inner_table) + { + auto inner_table = DatabaseCatalog::instance().tryGetTable(target.table_id, getContext()); + if (!inner_table) + return std::nullopt; + + auto total_bytes_in_inner_table = inner_table->totalBytesUncompressed(settings); + if (!total_bytes_in_inner_table) + return std::nullopt; + + total_bytes += *total_bytes_in_inner_table; + } + } + } + return total_bytes; +} + +Strings StorageTimeSeries::getDataPaths() const +{ + Strings data_paths; + for (const auto & target : targets) + { + auto table = DatabaseCatalog::instance().tryGetTable(target.table_id, getContext()); + if (!table) + continue; + + insertAtEnd(data_paths, table->getDataPaths()); + } + return data_paths; +} + + +bool StorageTimeSeries::optimize( + const ASTPtr & query, + const StorageMetadataPtr &, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Names & deduplicate_by_columns, + bool cleanup, + ContextPtr local_context) +{ + if (!has_inner_tables) + { + throw Exception(ErrorCodes::INCORRECT_QUERY, "TimeSeries table {} targets only existing tables. Execute the statement directly on it.", + getStorageID().getNameForLogs()); + } + + bool optimized = false; + for (const auto & target : targets) + { + if (target.is_inner_table) + { + auto inner_table = DatabaseCatalog::instance().getTable(target.table_id, local_context); + optimized |= inner_table->optimize(query, inner_table->getInMemoryMetadataPtr(), partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); + } + } + + return optimized; +} + + +void StorageTimeSeries::checkAlterIsPossible(const AlterCommands & commands, ContextPtr) const +{ + for (const auto & command : commands) + { + if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_SQL_SECURITY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); + } +} + +void StorageTimeSeries::alter(const AlterCommands & params, ContextPtr local_context, AlterLockHolder & table_lock_holder) +{ + IStorage::alter(params, local_context, table_lock_holder); +} + + +void StorageTimeSeries::renameInMemory(const StorageID & /* new_table_id */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Renaming is not supported by storage {} yet", getName()); +} + + +void StorageTimeSeries::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional &) +{ + for (const auto & target : targets) + { + /// We backup the target table's data only if it's inner. + if (target.is_inner_table) + { + auto table = DatabaseCatalog::instance().getTable(target.table_id, getContext()); + table->backupData(backup_entries_collector, fs::path{data_path_in_backup} / toString(target.kind), {}); + } + } +} + +void StorageTimeSeries::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional &) +{ + for (const auto & target : targets) + { + /// We backup the target table's data only if it's inner. + if (target.is_inner_table) + { + auto table = DatabaseCatalog::instance().getTable(target.table_id, getContext()); + table->restoreDataFromBackup(restorer, fs::path{data_path_in_backup} / toString(target.kind), {}); + } + } +} + + +void StorageTimeSeries::read( + QueryPlan & /* query_plan */, + const Names & /* column_names */, + const StorageSnapshotPtr & /* storage_snapshot */, + SelectQueryInfo & /* query_info */, + ContextPtr /* local_context */, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + size_t /* num_streams */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SELECT is not supported by storage {} yet", getName()); +} + + +SinkToStoragePtr StorageTimeSeries::write( + const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr /* local_context */, bool /* async_insert */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "INSERT is not supported by storage {} yet", getName()); +} + + +std::shared_ptr storagePtrToTimeSeries(StoragePtr storage) +{ + if (auto res = typeid_cast>(storage)) + return res; + + throw Exception( + ErrorCodes::UNEXPECTED_TABLE_ENGINE, + "This operation can be executed on a TimeSeries table only, the engine of table {} is not TimeSeries", + storage->getStorageID().getNameForLogs()); +} + +std::shared_ptr storagePtrToTimeSeries(ConstStoragePtr storage) +{ + if (auto res = typeid_cast>(storage)) + return res; + + throw Exception( + ErrorCodes::UNEXPECTED_TABLE_ENGINE, + "This operation can be executed on a TimeSeries table only, the engine of table {} is not TimeSeries", + storage->getStorageID().getNameForLogs()); +} + + +void registerStorageTimeSeries(StorageFactory & factory) +{ + factory.registerStorage("TimeSeries", [](const StorageFactory::Arguments & args) + { + /// Pass local_context here to convey setting to inner tables. + return std::make_shared( + args.table_id, args.getLocalContext(), args.mode, args.query, args.columns, args.comment); + } + , + { + .supports_settings = true, + .supports_schema_inference = true, + }); +} + +} diff --git a/src/Storages/StorageTimeSeries.h b/src/Storages/StorageTimeSeries.h new file mode 100644 index 00000000000..35db3131a0b --- /dev/null +++ b/src/Storages/StorageTimeSeries.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +struct TimeSeriesSettings; +using TimeSeriesSettingsPtr = std::shared_ptr; + +/// Represents a table engine to keep time series received by Prometheus protocols. +/// Examples of using this table engine: +/// +/// CREATE TABLE ts ENGINE = TimeSeries() +/// -OR- +/// CREATE TABLE ts ENGINE = TimeSeries() DATA [db].table1 TAGS [db].table2 METRICS [db].table3 +/// -OR- +/// CREATE TABLE ts ENGINE = TimeSeries() DATA ENGINE = MergeTree TAGS ENGINE = ReplacingMergeTree METRICS ENGINE = ReplacingMergeTree +/// -OR- +/// CREATE TABLE ts ( +/// id UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)) CODEC(ZSTD(3)), +/// instance LowCardinality(String), +/// job String +/// ) ENGINE = TimeSeries() +/// SETTINGS tags_to_columns = {'instance': 'instance', 'job': 'job'} +/// DATA ENGINE = ReplicatedMergeTree('zkpath', 'replica'), ... +/// +class StorageTimeSeries final : public IStorage, WithContext +{ +public: + /// Adds missing columns and reorder columns, and also adds inner table engines if they aren't specified. + static void normalizeTableDefinition(ASTCreateQuery & create_query, const ContextPtr & local_context); + + StorageTimeSeries(const StorageID & table_id, const ContextPtr & local_context, LoadingStrictnessLevel mode, + const ASTCreateQuery & query, const ColumnsDescription & columns, const String & comment); + + ~StorageTimeSeries() override; + + std::string getName() const override { return "TimeSeries"; } + + TimeSeriesSettings getStorageSettings() const; + TimeSeriesSettingsPtr getStorageSettingsPtr() const { return storage_settings; } + + StorageID getTargetTableId(ViewTarget::Kind target_kind) const; + StoragePtr getTargetTable(ViewTarget::Kind target_kind, const ContextPtr & local_context) const; + StoragePtr tryGetTargetTable(ViewTarget::Kind target_kind, const ContextPtr & local_context) const; + + void startup() override; + void shutdown(bool is_drop) override; + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override; + + bool optimize( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Names & deduplicate_by_columns, + bool cleanup, + ContextPtr local_context) override; + + void drop() override; + void dropInnerTableIfAny(bool sync, ContextPtr local_context) override; + + void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; + + void renameInMemory(const StorageID & new_table_id) override; + + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; + void alter(const AlterCommands & params, ContextPtr local_context, AlterLockHolder & table_lock_holder) override; + + void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; + void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + + std::optional totalRows(const Settings & settings) const override; + std::optional totalBytes(const Settings & settings) const override; + std::optional totalBytesUncompressed(const Settings & settings) const override; + Strings getDataPaths() const override; + +private: + TimeSeriesSettingsPtr storage_settings; + + struct Target + { + ViewTarget::Kind kind; + StorageID table_id = StorageID::createEmpty(); + bool is_inner_table; + }; + + std::vector targets; + bool has_inner_tables; +}; + +std::shared_ptr storagePtrToTimeSeries(StoragePtr storage); +std::shared_ptr storagePtrToTimeSeries(ConstStoragePtr storage); + +} diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 76c49c50f76..fc1354b780a 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,8 @@ #include #include #include + +#include #include #include #include @@ -90,11 +92,22 @@ static const std::vector> optional_regex_keys = { std::make_shared(R"(headers.header\[[0-9]*\].value)"), }; -static bool urlWithGlobs(const String & uri) +bool urlWithGlobs(const String & uri) { return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos; } +String getSampleURI(String uri, ContextPtr context) +{ + if (urlWithGlobs(uri)) + { + auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements); + if (!uris.empty()) + return uris[0]; + } + return uri; +} + static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) { return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout); @@ -152,8 +165,9 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings)); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } @@ -414,13 +428,14 @@ Chunk StorageURLSource::generate() size_t chunk_size = 0; if (input_format) chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, requested_virtual_columns, { .path = curr_uri.getPath(), - .size = current_file_size - }); + .size = current_file_size, + }, getContext()); return chunk; } @@ -839,7 +854,7 @@ namespace format = format_name; } - String getLastFileName() const override { return current_url_option; } + String getLastFilePath() const override { return current_url_option; } bool supportsLastReadBufferRecreation() const override { return true; } @@ -1160,6 +1175,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { createIterator(nullptr); + const auto & settings = context->getSettingsRef(); if (is_empty_glob) { @@ -1170,7 +1186,6 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil Pipes pipes; pipes.reserve(num_streams); - const auto & settings = context->getSettingsRef(); const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads / num_streams); for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 9d060e9e11c..19daf843431 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -141,6 +141,9 @@ private: virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; }; +bool urlWithGlobs(const String & uri); + +String getSampleURI(String uri, ContextPtr context); class StorageURLSource : public SourceWithKeyCondition, WithContext { diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 69041482fd8..140413d78b0 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster( } storage_metadata.setConstraints(constraints_); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context))); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index a81bcb08bfc..9e5adbfe825 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -21,7 +21,7 @@ const char * auto_config_build[] "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", - "USE_JEMALLOC", "@ENABLE_JEMALLOC@", + "USE_JEMALLOC", "@USE_JEMALLOC@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", @@ -36,7 +36,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "OPENSSL_VERSION", "@OPENSSL_VERSION@", "OPENSSL_IS_BORING_SSL", "@OPENSSL_IS_BORING_SSL@", - "USE_VECTORSCAN", "@ENABLE_VECTORSCAN@", + "USE_VECTORSCAN", "@USE_VECTORSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_ODBC", "@USE_ODBC@", "USE_GRPC", "@USE_GRPC@", @@ -62,8 +62,8 @@ const char * auto_config_build[] "USE_ARROW", "@USE_ARROW@", "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", - "USE_QPL", "@ENABLE_QPL@", - "USE_QAT", "@ENABLE_QATLIB@", + "USE_QPL", "@USE_QPL@", + "USE_QATLIB", "@USE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 9c5c07ae49f..9493d2c97ab 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -31,6 +32,8 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, + {"replication_lag", std::make_shared(std::make_shared()), "The replication lag of the `Replicated` database replica (for clusters that belong to a Replicated database)."}, + {"recovery_time", std::make_shared(std::make_shared()), "The recovery time of the `Replicated` database replica (for clusters that belong to a Replicated database), in milliseconds."}, }; description.setAliases({ @@ -67,6 +70,11 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const std const auto & shards_info = cluster->getShardsInfo(); const auto & addresses_with_failover = cluster->getShardsAddresses(); + size_t recovery_time_column_idx = columns_mask.size() - 1, replication_lag_column_idx = columns_mask.size() - 2, is_active_column_idx = columns_mask.size() - 3; + ReplicasInfo replicas_info; + if (replicated && (columns_mask[recovery_time_column_idx] || columns_mask[replication_lag_column_idx] || columns_mask[is_active_column_idx])) + replicas_info = replicated->tryGetReplicasInfo(name_and_cluster.second); + size_t replica_idx = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { @@ -114,17 +122,46 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const std res_columns[res_index++]->insert(address.database_shard_name); if (columns_mask[src_index++]) res_columns[res_index++]->insert(address.database_replica_name); + + /// make sure these three columns remain the last ones if (columns_mask[src_index++]) { - std::vector is_active; - if (replicated) - is_active = replicated->tryGetAreReplicasActive(name_and_cluster.second); - - if (is_active.empty()) + if (replicas_info.empty()) res_columns[res_index++]->insertDefault(); else - res_columns[res_index++]->insert(is_active[replica_idx++]); + { + const auto & replica_info = replicas_info[replica_idx]; + res_columns[res_index++]->insert(replica_info.is_active); + } } + if (columns_mask[src_index++]) + { + if (replicas_info.empty()) + res_columns[res_index++]->insertDefault(); + else + { + const auto & replica_info = replicas_info[replica_idx]; + if (replica_info.replication_lag != std::nullopt) + res_columns[res_index++]->insert(*replica_info.replication_lag); + else + res_columns[res_index++]->insertDefault(); + } + } + if (columns_mask[src_index++]) + { + if (replicas_info.empty()) + res_columns[res_index++]->insertDefault(); + else + { + const auto & replica_info = replicas_info[replica_idx]; + if (replica_info.recovery_time != 0) + res_columns[res_index++]->insert(replica_info.recovery_time); + else + res_columns[res_index++]->insertDefault(); + } + } + + ++replica_idx; } } } diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index f6adb902f43..a5f6d551ca1 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -1,10 +1,10 @@ #pragma once +#include #include #include #include - namespace DB { diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 03c569303c5..bc13cb77d5e 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -105,8 +105,8 @@ protected: while (rows_count < max_block_size && db_table_num < total_tables) { - const std::string database_name = (*databases)[db_table_num].get(); - const std::string table_name = (*tables)[db_table_num].get(); + const std::string database_name = (*databases)[db_table_num].safeGet(); + const std::string table_name = (*tables)[db_table_num].safeGet(); ++db_table_num; ColumnsDescription columns; @@ -437,7 +437,7 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, for (size_t i = 0; i < num_databases; ++i) { - const std::string database_name = (*database_column)[i].get(); + const std::string database_name = (*database_column)[i].safeGet(); if (database_name.empty()) { for (auto & [table_name, table] : external_tables) diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 35b9c0008c6..67dfe3bfe86 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -457,6 +457,7 @@ const char * auto_contributors[] { "Gleb-Tretyakov", "GoGoWen2021", "Gosha Letov", + "Graham Campbell", "Gregory", "Grigorii Sokolik", "Grigory", @@ -472,6 +473,7 @@ const char * auto_contributors[] { "Habibullah Oladepo", "HaiBo Li", "Hakob Saghatelyan", + "Halersson Paris", "Hamoon", "Han Fei", "Han Shukai", @@ -541,6 +543,7 @@ const char * auto_contributors[] { "JackyWoo", "Jacob Hayes", "Jacob Herrington", + "Jacob Reckhard", "Jai Jhala", "Jake Bamrah", "Jake Liu", @@ -661,6 +664,7 @@ const char * auto_contributors[] { "LaurieLY", "Lee sungju", "Lemore", + "Lennard Eijsackers", "Leonardo Cecchi", "Leonardo Maciel", "Leonid Krylov", @@ -922,6 +926,7 @@ const char * auto_contributors[] { "Pervakov Grigorii", "Pervakov Grigory", "Peter", + "Peter Nguyen", "Petr Vasilev", "Pham Anh Tuan", "Philip Hallstrom", @@ -981,6 +986,7 @@ const char * auto_contributors[] { "Ronald Bradford", "Rory Crispin", "Roy Bellingan", + "Ruihang Xia", "Ruslan", "Ruslan Mardugalliamov", "Ruslan Savchenko", @@ -1000,9 +1006,11 @@ const char * auto_contributors[] { "Sami Kerola", "Samuel Chou", "Samuel Colvin", + "Samuele Guerrini", "San", "Sanjam Panda", "Sariel", + "Sasha Sheikin", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -1202,6 +1210,7 @@ const char * auto_contributors[] { "Vladimir Makarov", "Vladimir Mihailenco", "Vladimir Smirnov", + "Vladimir Varankin", "Vladislav Rassokhin", "Vladislav Smirnov", "Vladislav V", @@ -1275,6 +1284,7 @@ const char * auto_contributors[] { "Zhichun Wu", "Zhiguo Zhou", "Zhipeng", + "Zhukova, Maria", "Zhuo Qiu", "Zijie Lu", "Zimu Li", @@ -1502,6 +1512,7 @@ const char * auto_contributors[] { "hchen9", "hcz", "hdhoang", + "heguangnan", "heleihelei", "helifu", "hendrik-m", @@ -1572,6 +1583,7 @@ const char * auto_contributors[] { "kevinyhzou", "kgurjev", "khamadiev", + "khodyrevyurii", "kigerzhang", "kirillikoff", "kmeaw", @@ -1787,6 +1799,7 @@ const char * auto_contributors[] { "ruslandoga", "ryzuo", "s-kat", + "sakulali", "sanjam", "santaux", "santrancisco", @@ -1804,6 +1817,7 @@ const char * auto_contributors[] { "shabroo", "shangshujie", "shedx", + "shiyer7474", "shuai-xu", "shuchaome", "shuyang", @@ -1901,6 +1915,7 @@ const char * auto_contributors[] { "wzl", "xPoSx", "xbthink", + "xc0derx", "xiao", "xiaolei565", "xiebin", @@ -1964,6 +1979,7 @@ const char * auto_contributors[] { "zkun", "zlx19950903", "zombee0", + "zoomxi", "zvonand", "zvrr", "zvvr", diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index cfb388bc232..0e972d8411b 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -47,6 +47,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_name, cache_data] : caches) { const auto & cache = cache_data->cache; + if (!cache->isInitialized()) + continue; + cache->iterate([&](const FileSegment::Info & file_segment) { size_t i = 0; diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp index 8915032baf7..c6bba6b8598 100644 --- a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp @@ -21,6 +21,7 @@ ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription() {"path", std::make_shared(), "Cache directory"}, {"max_size", std::make_shared(), "Cache size limit by the number of bytes"}, {"max_elements", std::make_shared(), "Cache size limit by the number of elements"}, + {"is_initialized", std::make_shared(), "Whether the cache is initialized and ready to be used"}, {"current_size", std::make_shared(), "Current cache size by the number of bytes"}, {"current_elements", std::make_shared(), "Current cache size by the number of elements"}, {"max_file_segment_size", std::make_shared(), "Maximum allowed file segment size"}, @@ -56,6 +57,7 @@ void StorageSystemFilesystemCacheSettings::fillData( res_columns[i++]->insert(settings.base_path); res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(cache->getUsedCacheSize()); res_columns[i++]->insert(cache->getFileSegmentsNum()); res_columns[i++]->insert(settings.max_file_segment_size); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 86713632339..db6804d3ad7 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, - {"last_used", std::make_shared(6), "The last time this consumer was in use."}, + {"last_used", std::make_shared(), "The last time this consumer was in use, unix time in microseconds."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } @@ -79,7 +79,7 @@ void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, Context auto & num_rebalance_revocations = assert_cast(*res_columns[index++]); auto & num_rebalance_assigments = assert_cast(*res_columns[index++]); auto & is_currently_used = assert_cast(*res_columns[index++]); - auto & last_used = assert_cast(*res_columns[index++]); + auto & last_used = assert_cast(*res_columns[index++]); auto & rdkafka_stat = assert_cast(*res_columns[index++]); const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 936d55e61a0..70377715dc3 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -41,7 +41,10 @@ Pipe StorageSystemOne::read( auto column = DataTypeUInt8().createColumnConst(1, 0u)->convertToFullColumnIfConst(); Chunk chunk({ std::move(column) }, 1); - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + auto source = std::make_shared(std::move(header), std::move(chunk)); + source->addTotalRowsApprox(1); + + return Pipe(source); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 7ace8ee24aa..c87bdb6d26a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -138,7 +138,7 @@ StoragesInfoStream::StoragesInfoStream(std::optional filter_by_datab for (size_t i = 0; i < rows; ++i) { - String database_name = (*database_column_for_filter)[i].get(); + String database_name = (*database_column_for_filter)[i].safeGet(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 806af4a7bf8..3be73aeda17 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -52,13 +52,13 @@ public: { StoragesInfo info; - info.database = (*database_column)[next_row].get(); - info.table = (*table_column)[next_row].get(); - UUID storage_uuid = (*storage_uuid_column)[next_row].get(); + info.database = (*database_column)[next_row].safeGet(); + info.table = (*table_column)[next_row].safeGet(); + UUID storage_uuid = (*storage_uuid_column)[next_row].safeGet(); auto is_same_table = [&storage_uuid, this] (size_t row) -> bool { - return (*storage_uuid_column)[row].get() == storage_uuid; + return (*storage_uuid_column)[row].safeGet() == storage_uuid; }; /// We may have two rows per table which differ in 'active' value. @@ -66,7 +66,7 @@ public: /// must collect the inactive parts. Remember this fact in StoragesInfo. for (; next_row < rows && is_same_table(next_row); ++next_row) { - const auto active = (*active_column)[next_row].get(); + const auto active = (*active_column)[next_row].safeGet(); if (active == 0) info.need_inactive_parts = true; } diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 4c54d4ae16f..b3532ba40a7 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -1,6 +1,7 @@ #include "StorageSystemQueryCache.h" #include #include +#include #include #include #include @@ -15,6 +16,7 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription() { {"query", std::make_shared(), "Query string."}, {"result_size", std::make_shared(), "Size of the query cache entry."}, + {"tag", std::make_shared(std::make_shared()), "Tag of the query cache entry."}, {"stale", std::make_shared(), "If the query cache entry is stale."}, {"shared", std::make_shared(), "If the query cache entry is shared between multiple users."}, {"compressed", std::make_shared(), "If the query cache entry is compressed."}, @@ -51,11 +53,12 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr res_columns[0]->insert(key.query_string); /// approximates the original query string res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result)); - res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now()); - res_columns[3]->insert(key.is_shared); - res_columns[4]->insert(key.is_compressed); - res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); - res_columns[6]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258) + res_columns[2]->insert(key.tag); + res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now()); + res_columns[4]->insert(key.is_shared); + res_columns[5]->insert(key.is_compressed); + res_columns[6]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); + res_columns[7]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258) } } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 30539ed6b6a..6e0dab1468d 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,7 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() { {"database", std::make_shared(), "The name of the database the table is in."}, {"view", std::make_shared(), "Table name."}, + {"uuid", std::make_shared(), "Table uuid (Atomic database)."}, {"status", std::make_shared(), "Current state of the refresh."}, {"last_refresh_result", std::make_shared(), "Outcome of the latest refresh attempt."}, {"last_refresh_time", std::make_shared(std::make_shared()), @@ -32,8 +34,9 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() "If status = 'WaitingForDependencies', a refresh is ready to start as soon as these dependencies are fulfilled." }, {"exception", std::make_shared(), - "if last_refresh_result = 'Exception', i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace." + "if last_refresh_result = 'Error', i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace." }, + {"retry", std::make_shared(), "How many failed attempts there were so far, for the current refresh."}, {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, {"elapsed", std::make_shared(), "The amount of nanoseconds the current refresh took."}, @@ -63,6 +66,7 @@ void StorageSystemViewRefreshes::fillData( std::size_t i = 0; res_columns[i++]->insert(refresh.view_id.getDatabaseName()); res_columns[i++]->insert(refresh.view_id.getTableName()); + res_columns[i++]->insert(refresh.view_id.uuid); res_columns[i++]->insert(toString(refresh.state)); res_columns[i++]->insert(toString(refresh.last_refresh_result)); @@ -85,6 +89,7 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(Array(deps)); res_columns[i++]->insert(refresh.exception_message); + res_columns[i++]->insert(refresh.retry); res_columns[i++]->insert(refresh.refresh_count); res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); diff --git a/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp new file mode 100644 index 00000000000..d6d258f5ff6 --- /dev/null +++ b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp @@ -0,0 +1,472 @@ +#include + +#include "config.h" +#if USE_PROMETHEUS_PROTOBUFS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_REQUEST_PARAMETER; +} + + +namespace +{ + /// Makes an ASTIdentifier for a column of the specified table. + ASTPtr makeASTColumn(const StorageID & table_id, const String & column_name) + { + return std::make_shared(Strings{table_id.database_name, table_id.table_name, column_name}); + } + + /// Makes an AST for condition `data_table.timestamp >= min_timestamp_ms` + ASTPtr makeASTTimestampGreaterOrEquals(Int64 min_timestamp_ms, const StorageID & data_table_id) + { + return makeASTFunction("greaterOrEquals", + makeASTColumn(data_table_id, TimeSeriesColumnNames::Timestamp), + std::make_shared(Field{DecimalField{DateTime64{min_timestamp_ms}, 3}})); + } + + /// Makes an AST for condition `data_table.timestamp <= max_timestamp_ms` + ASTPtr makeASTTimestampLessOrEquals(Int64 max_timestamp_ms, const StorageID & data_table_id) + { + return makeASTFunction("lessOrEquals", + makeASTColumn(data_table_id, TimeSeriesColumnNames::Timestamp), + std::make_shared(Field{DecimalField{DateTime64{max_timestamp_ms}, 3}})); + } + + /// Makes an AST for condition `tags_table.max_time >= min_timestamp_ms` + ASTPtr makeASTMaxTimeGreaterOrEquals(Int64 min_timestamp_ms, const StorageID & tags_table_id) + { + return makeASTFunction("greaterOrEquals", + makeASTColumn(tags_table_id, TimeSeriesColumnNames::MaxTime), + std::make_shared(Field{DecimalField{DateTime64{min_timestamp_ms}, 3}})); + } + + /// Makes an AST for condition `tags_table.min_time <= max_timestamp_ms` + ASTPtr makeASTMinTimeLessOrEquals(Int64 max_timestamp_ms, const StorageID & tags_table_id) + { + return makeASTFunction("lessOrEquals", + makeASTColumn(tags_table_id, TimeSeriesColumnNames::MinTime), + std::make_shared(Field{DecimalField{DateTime64{max_timestamp_ms}, 3}})); + } + + /// Makes an AST for the expression referencing a tag value. + ASTPtr makeASTLabelName(const String & label_name, const StorageID & tags_table_id, const std::unordered_map & column_name_by_tag_name) + { + if (label_name == TimeSeriesTagNames::MetricName) + return makeASTColumn(tags_table_id, TimeSeriesColumnNames::MetricName); + + auto it = column_name_by_tag_name.find(label_name); + if (it != column_name_by_tag_name.end()) + return makeASTColumn(tags_table_id, it->second); + + /// arrayElement() can be used to extract a value from a Map too. + return makeASTFunction("arrayElement", makeASTColumn(tags_table_id, TimeSeriesColumnNames::Tags), std::make_shared(label_name)); + } + + /// Makes an AST for a label matcher, for example `metric_name == 'value'` or `NOT match(labels['label_name'], 'regexp')`. + ASTPtr makeASTLabelMatcher( + const prometheus::LabelMatcher & label_matcher, + const StorageID & tags_table_id, + const std::unordered_map & column_name_by_tag_name) + { + const auto & label_name = label_matcher.name(); + const auto & label_value = label_matcher.value(); + auto type = label_matcher.type(); + + if (type == prometheus::LabelMatcher::EQ) + return makeASTFunction("equals", makeASTLabelName(label_name, tags_table_id, column_name_by_tag_name), std::make_shared(label_value)); + else if (type == prometheus::LabelMatcher::NEQ) + return makeASTFunction("notEquals", makeASTLabelName(label_name, tags_table_id, column_name_by_tag_name), std::make_shared(label_value)); + else if (type == prometheus::LabelMatcher::RE) + return makeASTFunction("match", makeASTLabelName(label_name, tags_table_id, column_name_by_tag_name), std::make_shared(label_value)); + else if (type == prometheus::LabelMatcher::NRE) + return makeASTFunction("not", makeASTFunction("match", makeASTLabelName(label_name, tags_table_id, column_name_by_tag_name), std::make_shared(label_value))); + else + throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Unexpected type of label matcher: {}", type); + } + + /// Makes an AST checking that tags match a specified label matcher and that timestamp is in range [min_timestamp_ms, max_timestamp_ms]. + ASTPtr makeASTFilterForReadingTimeSeries( + const google::protobuf::RepeatedPtrField & label_matcher, + Int64 min_timestamp_ms, + Int64 max_timestamp_ms, + const StorageID & data_table_id, + const StorageID & tags_table_id, + const std::unordered_map & column_name_by_tag_name, + bool filter_by_min_time_and_max_time) + { + ASTs filters; + + if (min_timestamp_ms) + { + filters.push_back(makeASTTimestampGreaterOrEquals(min_timestamp_ms, data_table_id)); + if (filter_by_min_time_and_max_time) + filters.push_back(makeASTMaxTimeGreaterOrEquals(min_timestamp_ms, tags_table_id)); + } + + if (max_timestamp_ms) + { + filters.push_back(makeASTTimestampLessOrEquals(max_timestamp_ms, data_table_id)); + if (filter_by_min_time_and_max_time) + filters.push_back(makeASTMinTimeLessOrEquals(max_timestamp_ms, tags_table_id)); + } + + for (const auto & label_matcher_element : label_matcher) + filters.push_back(makeASTLabelMatcher(label_matcher_element, tags_table_id, column_name_by_tag_name)); + + if (filters.empty()) + return nullptr; + + return makeASTForLogicalAnd(std::move(filters)); + } + + /// Makes a mapping from a tag name to a column name. + std::unordered_map makeColumnNameByTagNameMap(const TimeSeriesSettings & storage_settings) + { + std::unordered_map res; + const Map & tags_to_columns = storage_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & tag_name = tuple.at(0).safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + res[tag_name] = column_name; + } + return res; + } + + /// The function builds a SELECT query for reading time series: + /// SELECT tags_table.metric_name, tags_table.tag_column1, ... tags_table.tag_columnN, tags_table.tags, + /// groupArray(CAST(data_table.timestamp, 'DateTime64(3)'), CAST(data_table.value, 'Float64')) + /// FROM data_table + /// SEMI LEFT JOIN tag_table ON data_table.id = tags_table.id + /// WHERE filter + /// GROUP BY tags_table.tag_column1, ..., tags_table.tag_columnN, tags_table.tags + ASTPtr buildSelectQueryForReadingTimeSeries( + Int64 min_timestamp_ms, + Int64 max_timestamp_ms, + const google::protobuf::RepeatedPtrField & label_matcher, + const TimeSeriesSettings & time_series_settings, + const StorageID & data_table_id, + const StorageID & tags_table_id) + { + auto select_query = std::make_shared(); + + /// SELECT tags_table.metric_name, any(tags_table.tag_column1), ... any(tags_table.tag_columnN), any(tags_table.tags), + /// groupArray(data_table.timestamp, data_table.value) + { + auto exp_list = std::make_shared(); + + exp_list->children.push_back( + makeASTColumn(tags_table_id, TimeSeriesColumnNames::MetricName)); + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + exp_list->children.push_back( + makeASTColumn(tags_table_id, column_name)); + } + + exp_list->children.push_back( + makeASTColumn(tags_table_id, TimeSeriesColumnNames::Tags)); + + exp_list->children.push_back( + makeASTFunction("groupArray", + makeASTFunction("tuple", + makeASTFunction("CAST", makeASTColumn(data_table_id, TimeSeriesColumnNames::Timestamp), std::make_shared("DateTime64(3)")), + makeASTFunction("CAST", makeASTColumn(data_table_id, TimeSeriesColumnNames::Value), std::make_shared("Float64"))))); + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, exp_list); + } + + /// FROM data_table + auto tables = std::make_shared(); + + { + auto table = std::make_shared(); + auto table_exp = std::make_shared(); + table_exp->database_and_table_name = std::make_shared(data_table_id); + table_exp->children.emplace_back(table_exp->database_and_table_name); + + table->table_expression = table_exp; + tables->children.push_back(table); + } + + /// SEMI LEFT JOIN tags_table ON data_table.id = tags_table.id + { + auto table = std::make_shared(); + + auto table_join = std::make_shared(); + table_join->kind = JoinKind::Left; + table_join->strictness = JoinStrictness::Semi; + + table_join->on_expression = makeASTFunction("equals", makeASTColumn(data_table_id, TimeSeriesColumnNames::ID), makeASTColumn(tags_table_id, TimeSeriesColumnNames::ID)); + table->table_join = table_join; + + auto table_exp = std::make_shared(); + table_exp->database_and_table_name = std::make_shared(tags_table_id); + table_exp->children.emplace_back(table_exp->database_and_table_name); + + table->table_expression = table_exp; + tables->children.push_back(table); + + select_query->setExpression(ASTSelectQuery::Expression::TABLES, tables); + } + + auto column_name_by_tag_name = makeColumnNameByTagNameMap(time_series_settings); + + /// WHERE + if (auto where = makeASTFilterForReadingTimeSeries(label_matcher, min_timestamp_ms, max_timestamp_ms, data_table_id, tags_table_id, + column_name_by_tag_name, time_series_settings.filter_by_min_time_and_max_time)) + { + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where)); + } + + /// GROUP BY tags_table.metric_name, tags_table.tag_column1, ..., tags_table.tag_columnN, tags_table.tags + { + auto exp_list = std::make_shared(); + + exp_list->children.push_back( + makeASTColumn(tags_table_id, TimeSeriesColumnNames::MetricName)); + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + exp_list->children.push_back( + makeASTColumn(tags_table_id, column_name)); + } + + exp_list->children.push_back(makeASTColumn(tags_table_id, TimeSeriesColumnNames::Tags)); + + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, exp_list); + } + + return select_query; + } + + /// Sorts a list of pairs {tag_name, tag_value} by tag name. + void sortLabelsByName(std::vector> & labels) + { + auto less_by_label_name = [](const std::pair & left, const std::pair & right) + { + return left.first < right.first; + }; + std::sort(labels.begin(), labels.end(), less_by_label_name); + } + + /// Sorts a list of pairs {timestamp, value} by timestamp. + void sortTimeSeriesByTimestamp(std::vector> & time_series) + { + auto less_by_timestamp = [](const std::pair & left, const std::pair & right) + { + return left.first < right.first; + }; + std::sort(time_series.begin(), time_series.end(), less_by_timestamp); + } + + /// Converts a block generated by the SELECT query for converting time series to the protobuf format. + void convertBlockToProtobuf( + Block && block, + google::protobuf::RepeatedPtrField & out_time_series, + const StorageID & time_series_storage_id, + const TimeSeriesSettings & time_series_settings) + { + size_t num_rows = block.rows(); + if (!num_rows) + return; + + size_t column_index = 0; + + /// We analyze columns sequentially. + auto get_next_column_with_type = [&] -> const ColumnWithTypeAndName & { return block.getByPosition(column_index++); }; + auto get_next_column = [&] -> const IColumn & { return *(get_next_column_with_type().column); }; + + /// Column "metric_name". + const auto & metric_name_column_with_type = get_next_column_with_type(); + TimeSeriesColumnsValidator validator{time_series_storage_id, time_series_settings}; + validator.validateColumnForMetricName(metric_name_column_with_type); + const auto & metric_name_column = *metric_name_column_with_type.column; + + /// Columns corresponding to specific tags specified in the "tags_to_columns" setting. + std::unordered_map column_by_tag_name; + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & tag_name = tuple.at(0).safeGet(); + const auto & column_with_type = get_next_column_with_type(); + validator.validateColumnForTagValue(column_with_type); + const auto & column = *column_with_type.column; + column_by_tag_name[tag_name] = &column; + } + + /// Column "tags". + const auto & tags_column_with_type = get_next_column_with_type(); + validator.validateColumnForTagsMap(tags_column_with_type); + const auto & tags_column = checkAndGetColumn(*tags_column_with_type.column); + const auto & tags_names = tags_column.getNestedData().getColumn(0); + const auto & tags_values = tags_column.getNestedData().getColumn(1); + const auto & tags_offsets = tags_column.getNestedColumn().getOffsets(); + + /// Column containing time series: groupArray(CAST(data_table.timestamp, 'DateTime64(3)'), CAST(data_table.value, 'Float64')) + const auto & time_series_column = checkAndGetColumn(get_next_column()); + const auto & time_series_timestamps = checkAndGetColumn>(checkAndGetColumn(time_series_column.getData()).getColumn(0)); + const auto & time_series_values = checkAndGetColumn(checkAndGetColumn(time_series_column.getData()).getColumn(1)); + const auto & time_series_offsets = time_series_column.getOffsets(); + + /// We will sort labels lexicographically and time series by timestamp before sending them to a client. + std::vector> labels; + std::vector> time_series; + + for (size_t i = 0; i != num_rows; ++i) + { + /// Collect labels. + size_t num_labels = 1; /* 1 for a metric name */ + + for (const auto & [_, column] : column_by_tag_name) + { + if (!column->isNullAt(i) && !column->getDataAt(i).empty()) + ++num_labels; + } + + size_t tags_start_offset = tags_offsets[i - 1]; + size_t tags_end_offset = tags_offsets[i]; + num_labels += tags_end_offset - tags_start_offset; + + labels.clear(); + labels.reserve(num_labels); + + labels.emplace_back(TimeSeriesTagNames::MetricName, metric_name_column.getDataAt(i)); + + for (const auto & [tag_name, column] : column_by_tag_name) + { + if (!column->isNullAt(i) && !column->getDataAt(i).empty()) + labels.emplace_back(tag_name, column->getDataAt(i)); + } + + for (size_t j = tags_start_offset; j != tags_end_offset; ++j) + { + std::string_view tag_name{tags_names.getDataAt(j)}; + std::string_view tag_value{tags_values.getDataAt(j)}; + labels.emplace_back(tag_name, tag_value); + } + + /// Sort labels. + sortLabelsByName(labels); + + /// Collect time series. + size_t time_series_start_offset = time_series_offsets[i - 1]; + size_t time_series_end_offset = time_series_offsets[i]; + size_t num_time_series = time_series_end_offset - time_series_start_offset; + + time_series.clear(); + time_series.reserve(num_time_series); + + for (size_t j = time_series_start_offset; j != time_series_end_offset; ++j) + time_series.emplace_back(time_series_timestamps.getElement(j), time_series_values.getElement(j)); + + /// Sort time series. + sortTimeSeriesByTimestamp(time_series); + + /// Prepare a result. + auto & new_time_series = *out_time_series.Add(); + + for (const auto & [label_name, label_value] : labels) + { + auto & new_label = *new_time_series.add_labels(); + new_label.set_name(label_name); + new_label.set_value(label_value); + } + + for (const auto & [timestamp, value] : time_series) + { + auto & new_sample = *new_time_series.add_samples(); + new_sample.set_timestamp(timestamp); + new_sample.set_value(value); + } + } + } +} + + +PrometheusRemoteReadProtocol::PrometheusRemoteReadProtocol(ConstStoragePtr time_series_storage_, const ContextPtr & context_) + : WithContext{context_} + , time_series_storage(storagePtrToTimeSeries(time_series_storage_)) + , log(getLogger("PrometheusRemoteReadProtocol")) +{ +} + +PrometheusRemoteReadProtocol::~PrometheusRemoteReadProtocol() = default; + +void PrometheusRemoteReadProtocol::readTimeSeries(google::protobuf::RepeatedPtrField & out_time_series, + Int64 start_timestamp_ms, + Int64 end_timestamp_ms, + const google::protobuf::RepeatedPtrField & label_matcher, + const prometheus::ReadHints &) +{ + out_time_series.Clear(); + + auto time_series_storage_id = time_series_storage->getStorageID(); + auto time_series_settings = time_series_storage->getStorageSettingsPtr(); + auto data_table_id = time_series_storage->getTargetTableId(ViewTarget::Data); + auto tags_table_id = time_series_storage->getTargetTableId(ViewTarget::Tags); + + ASTPtr select_query = buildSelectQueryForReadingTimeSeries( + start_timestamp_ms, end_timestamp_ms, label_matcher, *time_series_settings, data_table_id, tags_table_id); + + LOG_TRACE(log, "{}: Executing query {}", + time_series_storage_id.getNameForLogs(), select_query); + + InterpreterSelectQuery interpreter(select_query, getContext(), SelectQueryOptions{}); + BlockIO io = interpreter.execute(); + PullingPipelineExecutor executor(io.pipeline); + + Block block; + while (executor.pull(block)) + { + LOG_TRACE(log, "{}: Pulled block with {} columns and {} rows", + time_series_storage_id.getNameForLogs(), block.columns(), block.rows()); + + if (block) + convertBlockToProtobuf(std::move(block), out_time_series, time_series_storage_id, *time_series_settings); + } + + LOG_TRACE(log, "{}: {} time series read", + time_series_storage_id.getNameForLogs(), out_time_series.size()); +} + +} + +#endif diff --git a/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.h b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.h new file mode 100644 index 00000000000..e10e1f8c8cf --- /dev/null +++ b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.h @@ -0,0 +1,36 @@ +#pragma once + +#include "config.h" +#if USE_PROMETHEUS_PROTOBUFS + +#include +#include +#include + + +namespace DB +{ +class StorageTimeSeries; + +/// Helper class to support the prometheus remote read protocol. +class PrometheusRemoteReadProtocol : public WithContext +{ +public: + PrometheusRemoteReadProtocol(ConstStoragePtr time_series_storage_, const ContextPtr & context_); + ~PrometheusRemoteReadProtocol(); + + /// Reads time series to send to client by remote read protocol. + void readTimeSeries(google::protobuf::RepeatedPtrField & out_time_series, + Int64 start_timestamp_ms, + Int64 end_timestamp_ms, + const google::protobuf::RepeatedPtrField & label_matcher, + const prometheus::ReadHints & read_hints); + +private: + std::shared_ptr time_series_storage; + Poco::LoggerPtr log; +}; + +} + +#endif diff --git a/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.cpp b/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.cpp new file mode 100644 index 00000000000..702d058ee79 --- /dev/null +++ b/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.cpp @@ -0,0 +1,601 @@ +#include + +#include "config.h" +#if USE_PROMETHEUS_PROTOBUFS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TIME_SERIES_TAGS; + extern const int ILLEGAL_COLUMN; +} + + +namespace +{ + /// Checks that a specified set of labels is sorted and has no duplications, and there is one label named "__name__". + void checkLabels(const ::google::protobuf::RepeatedPtrField<::prometheus::Label> & labels) + { + bool metric_name_found = false; + for (size_t i = 0; i != static_cast(labels.size()); ++i) + { + const auto & label = labels[static_cast(i)]; + const auto & label_name = label.name(); + const auto & label_value = label.value(); + + if (label_name.empty()) + throw Exception(ErrorCodes::ILLEGAL_TIME_SERIES_TAGS, "Label name should not be empty"); + if (label_value.empty()) + continue; /// Empty label value is treated like the label doesn't exist. + + if (label_name == TimeSeriesTagNames::MetricName) + metric_name_found = true; + + if (i) + { + /// Check that labels are sorted. + const auto & previous_label_name = labels[static_cast(i - 1)].name(); + if (label_name <= previous_label_name) + { + if (label_name == previous_label_name) + throw Exception(ErrorCodes::ILLEGAL_TIME_SERIES_TAGS, "Found duplicate label {}", label_name); + else + throw Exception(ErrorCodes::ILLEGAL_TIME_SERIES_TAGS, "Label names are not sorted in lexicographical order ({} > {})", + previous_label_name, label_name); + } + } + } + + if (!metric_name_found) + throw Exception(ErrorCodes::ILLEGAL_TIME_SERIES_TAGS, "Metric name (label {}) not found", TimeSeriesTagNames::MetricName); + } + + /// Finds the description of an insertable column in the list. + const ColumnDescription & getInsertableColumnDescription(const ColumnsDescription & columns, const String & column_name, const StorageID & time_series_storage_id) + { + const ColumnDescription * column = columns.tryGet(column_name); + if (!column || ((column->default_desc.kind != ColumnDefaultKind::Default) && (column->default_desc.kind != ColumnDefaultKind::Ephemeral))) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{}: Column {} {}", + time_series_storage_id.getNameForLogs(), column_name, column ? "non-insertable" : "doesn't exist"); + } + return *column; + } + + /// Calculates the identifier of each time series in "tags_block" using the default expression for the "id" column, + /// and adds column "id" with the results to "tags_block". + IColumn & calculateId(const ContextPtr & context, const ColumnDescription & id_column_description, Block & tags_block) + { + auto blocks = std::make_shared(); + blocks->push_back(tags_block); + + auto header = tags_block.cloneEmpty(); + auto pipe = Pipe(std::make_shared(blocks, header)); + + Block header_with_id; + const auto & id_name = id_column_description.name; + auto id_type = id_column_description.type; + header_with_id.insert(ColumnWithTypeAndName{id_type, id_name}); + + auto adding_missing_defaults_dag = addMissingDefaults( + pipe.getHeader(), + header_with_id.getNamesAndTypesList(), + ColumnsDescription{id_column_description}, + context); + + auto adding_missing_defaults_actions = std::make_shared(std::move(adding_missing_defaults_dag)); + pipe.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header, adding_missing_defaults_actions); + }); + + auto convert_actions_dag = ActionsDAG::makeConvertingActions( + pipe.getHeader().getColumnsWithTypeAndName(), + header_with_id.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared( + std::move(convert_actions_dag), + ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + pipe.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header, actions); + }); + + QueryPipeline pipeline{std::move(pipe)}; + PullingPipelineExecutor executor{pipeline}; + + MutableColumnPtr id_column; + + Block block_from_executor; + while (executor.pull(block_from_executor)) + { + if (block_from_executor) + { + MutableColumnPtr id_column_part = block_from_executor.getByName(id_name).column->assumeMutable(); + if (id_column) + id_column->insertRangeFrom(*id_column_part, 0, id_column_part->size()); + else + id_column = std::move(id_column_part); + } + } + + if (!id_column) + id_column = id_type->createColumn(); + + IColumn & id_column_ref = *id_column; + tags_block.insert(0, ColumnWithTypeAndName{std::move(id_column), id_type, id_name}); + return id_column_ref; + } + + /// Converts a timestamp in milliseconds to a DateTime64 with a specified scale. + DateTime64 scaleTimestamp(Int64 timestamp_ms, UInt32 scale) + { + if (scale == 3) + return timestamp_ms; + else if (scale > 3) + return timestamp_ms * DecimalUtils::scaleMultiplier(scale - 3); + else + return timestamp_ms / DecimalUtils::scaleMultiplier(3 - scale); + } + + /// Finds min time and max time in a time series. + std::pair findMinTimeAndMaxTime(const google::protobuf::RepeatedPtrField & samples) + { + chassert(!samples.empty()); + Int64 min_time = std::numeric_limits::max(); + Int64 max_time = std::numeric_limits::min(); + for (const auto & sample : samples) + { + Int64 timestamp = sample.timestamp(); + if (timestamp < min_time) + min_time = timestamp; + if (timestamp > max_time) + max_time = timestamp; + } + return {min_time, max_time}; + } + + struct BlocksToInsert + { + std::vector> blocks; + }; + + /// Converts time series from the protobuf format to prepared blocks for inserting into target tables. + BlocksToInsert toBlocks(const google::protobuf::RepeatedPtrField & time_series, + const ContextPtr & context, + const StorageID & time_series_storage_id, + const StorageInMemoryMetadata & time_series_storage_metadata, + const TimeSeriesSettings & time_series_settings) + { + size_t num_tags_rows = time_series.size(); + + size_t num_data_rows = 0; + for (const auto & element : time_series) + num_data_rows += element.samples_size(); + + if (!num_data_rows) + return {}; /// Nothing to insert into target tables. + + /// Column types must be extracted from the target tables' metadata. + const auto & columns_description = time_series_storage_metadata.columns; + + auto get_column_description = [&](const String & column_name) -> const ColumnDescription & + { + return getInsertableColumnDescription(columns_description, column_name, time_series_storage_id); + }; + + /// We're going to prepare two blocks - one for the "data" table, and one for the "tags" table. + Block data_block, tags_block; + + auto make_column_for_data_block = [&](const ColumnDescription & column_description) -> IColumn & + { + auto column = column_description.type->createColumn(); + column->reserve(num_data_rows); + auto * column_ptr = column.get(); + data_block.insert(ColumnWithTypeAndName{std::move(column), column_description.type, column_description.name}); + return *column_ptr; + }; + + auto make_column_for_tags_block = [&](const ColumnDescription & column_description) -> IColumn & + { + auto column = column_description.type->createColumn(); + column->reserve(num_tags_rows); + auto * column_ptr = column.get(); + tags_block.insert(ColumnWithTypeAndName{std::move(column), column_description.type, column_description.name}); + return *column_ptr; + }; + + /// Create columns. + + /// Column "id". + const auto & id_description = get_column_description(TimeSeriesColumnNames::ID); + TimeSeriesColumnsValidator validator{time_series_storage_id, time_series_settings}; + validator.validateColumnForID(id_description); + auto & id_column_in_data_table = make_column_for_data_block(id_description); + + /// Column "timestamp". + const auto & timestamp_description = get_column_description(TimeSeriesColumnNames::Timestamp); + UInt32 timestamp_scale; + validator.validateColumnForTimestamp(timestamp_description, timestamp_scale); + auto & timestamp_column = make_column_for_data_block(timestamp_description); + + /// Column "value". + const auto & value_description = get_column_description(TimeSeriesColumnNames::Value); + validator.validateColumnForValue(value_description); + auto & value_column = make_column_for_data_block(value_description); + + /// Column "metric_name". + const auto & metric_name_description = get_column_description(TimeSeriesColumnNames::MetricName); + validator.validateColumnForMetricName(metric_name_description); + auto & metric_name_column = make_column_for_tags_block(metric_name_description); + + /// Columns we should check explicitly that they're filled after filling each row. + std::vector columns_to_fill_in_tags_table; + + /// Columns corresponding to specific tags specified in the "tags_to_columns" setting. + std::unordered_map columns_by_tag_name; + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & tag_name = tuple.at(0).safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + const auto & column_description = get_column_description(column_name); + validator.validateColumnForTagValue(column_description); + auto & column = make_column_for_tags_block(column_description); + columns_by_tag_name[tag_name] = &column; + columns_to_fill_in_tags_table.emplace_back(&column); + } + + /// Column "tags". + const auto & tags_description = get_column_description(TimeSeriesColumnNames::Tags); + validator.validateColumnForTagsMap(tags_description); + auto & tags_column = typeid_cast(make_column_for_tags_block(tags_description)); + IColumn & tags_names = tags_column.getNestedData().getColumn(0); + IColumn & tags_values = tags_column.getNestedData().getColumn(1); + auto & tags_offsets = tags_column.getNestedColumn().getOffsets(); + + /// Column "all_tags". + IColumn * all_tags_names = nullptr; + IColumn * all_tags_values = nullptr; + IColumn::Offsets * all_tags_offsets = nullptr; + if (time_series_settings.use_all_tags_column_to_generate_id) + { + const auto & all_tags_description = get_column_description(TimeSeriesColumnNames::AllTags); + validator.validateColumnForTagsMap(all_tags_description); + auto & all_tags_column = typeid_cast(make_column_for_tags_block(all_tags_description)); + all_tags_names = &all_tags_column.getNestedData().getColumn(0); + all_tags_values = &all_tags_column.getNestedData().getColumn(1); + all_tags_offsets = &all_tags_column.getNestedColumn().getOffsets(); + } + + /// Columns "min_time" and "max_time". + IColumn * min_time_column = nullptr; + IColumn * max_time_column = nullptr; + UInt32 min_time_scale = 0; + UInt32 max_time_scale = 0; + if (time_series_settings.store_min_time_and_max_time) + { + const auto & min_time_description = get_column_description(TimeSeriesColumnNames::MinTime); + const auto & max_time_description = get_column_description(TimeSeriesColumnNames::MaxTime); + validator.validateColumnForTimestamp(min_time_description, min_time_scale); + validator.validateColumnForTimestamp(max_time_description, max_time_scale); + min_time_column = &make_column_for_tags_block(min_time_description); + max_time_column = &make_column_for_tags_block(max_time_description); + columns_to_fill_in_tags_table.emplace_back(min_time_column); + columns_to_fill_in_tags_table.emplace_back(max_time_column); + } + + /// Prepare a block for inserting into the "tags" table. + size_t current_row_in_tags = 0; + for (size_t i = 0; i != static_cast(time_series.size()); ++i) + { + const auto & element = time_series[static_cast(i)]; + if (!element.samples_size()) + continue; + + const auto & labels = element.labels(); + checkLabels(labels); + + for (size_t j = 0; j != static_cast(labels.size()); ++j) + { + const auto & label = labels[static_cast(j)]; + const auto & tag_name = label.name(); + const auto & tag_value = label.value(); + + if (tag_name == TimeSeriesTagNames::MetricName) + { + metric_name_column.insertData(tag_value.data(), tag_value.length()); + } + else + { + if (time_series_settings.use_all_tags_column_to_generate_id) + { + all_tags_names->insertData(tag_name.data(), tag_name.length()); + all_tags_values->insertData(tag_value.data(), tag_value.length()); + } + + auto it = columns_by_tag_name.find(tag_name); + bool has_column_for_tag_value = (it != columns_by_tag_name.end()); + if (has_column_for_tag_value) + { + auto * column = it->second; + column->insertData(tag_value.data(), tag_value.length()); + } + else + { + tags_names.insertData(tag_name.data(), tag_name.length()); + tags_values.insertData(tag_value.data(), tag_value.length()); + } + } + } + + tags_offsets.push_back(tags_names.size()); + + if (time_series_settings.use_all_tags_column_to_generate_id) + all_tags_offsets->push_back(all_tags_names->size()); + + if (time_series_settings.store_min_time_and_max_time) + { + auto [min_time, max_time] = findMinTimeAndMaxTime(element.samples()); + min_time_column->insert(scaleTimestamp(min_time, min_time_scale)); + max_time_column->insert(scaleTimestamp(max_time, max_time_scale)); + } + + for (auto * column : columns_to_fill_in_tags_table) + { + if (column->size() == current_row_in_tags) + column->insertDefault(); + } + + ++current_row_in_tags; + } + + /// Calculate an identifier for each time series, make a new column from those identifiers, and add it to "tags_block". + auto & id_column_in_tags_table = calculateId(context, columns_description.get(TimeSeriesColumnNames::ID), tags_block); + + /// Prepare a block for inserting to the "data" table. + current_row_in_tags = 0; + for (size_t i = 0; i != static_cast(time_series.size()); ++i) + { + const auto & element = time_series[static_cast(i)]; + if (!element.samples_size()) + continue; + + id_column_in_data_table.insertManyFrom(id_column_in_tags_table, current_row_in_tags, element.samples_size()); + for (const auto & sample : element.samples()) + { + timestamp_column.insert(scaleTimestamp(sample.timestamp(), timestamp_scale)); + value_column.insert(sample.value()); + } + + ++current_row_in_tags; + } + + /// The "all_tags" column in the "tags" table is either ephemeral or doesn't exists. + /// We've used the "all_tags" column to calculate the "id" column already, + /// and now we don't need it to insert to the "tags" table. + tags_block.erase(TimeSeriesColumnNames::AllTags); + + BlocksToInsert res; + + /// A block to the "tags" table should be inserted first. + /// (Because any INSERT can fail and we don't want to have rows in the data table with no corresponding "id" written to the "tags" table.) + res.blocks.emplace_back(ViewTarget::Tags, std::move(tags_block)); + res.blocks.emplace_back(ViewTarget::Data, std::move(data_block)); + + return res; + } + + std::string_view metricTypeToString(prometheus::MetricMetadata::MetricType metric_type) + { + using namespace std::literals; + switch (metric_type) + { + case prometheus::MetricMetadata::UNKNOWN: return "unknown"sv; + case prometheus::MetricMetadata::COUNTER: return "counter"sv; + case prometheus::MetricMetadata::GAUGE: return "gauge"sv; + case prometheus::MetricMetadata::HISTOGRAM: return "histogram"sv; + case prometheus::MetricMetadata::GAUGEHISTOGRAM: return "gaugehistogram"sv; + case prometheus::MetricMetadata::SUMMARY: return "summary"sv; + case prometheus::MetricMetadata::INFO: return "info"sv; + case prometheus::MetricMetadata::STATESET: return "stateset"sv; + default: break; + } + return ""; + } + + /// Converts metrics metadata from the protobuf format to prepared blocks for inserting into target tables. + BlocksToInsert toBlocks(const google::protobuf::RepeatedPtrField & metrics_metadata, + const StorageID & time_series_storage_id, + const StorageInMemoryMetadata & time_series_storage_metadata, + const TimeSeriesSettings & time_series_settings) + { + size_t num_rows = metrics_metadata.size(); + + if (!num_rows) + return {}; /// Nothing to insert into target tables. + + /// Column types must be extracted from the target tables' metadata. + const auto & columns_description = time_series_storage_metadata.columns; + + auto get_column_description = [&](const String & column_name) -> const ColumnDescription & + { + return getInsertableColumnDescription(columns_description, column_name, time_series_storage_id); + }; + + /// We're going to prepare one blocks for the "metrics" table. + Block block; + + auto make_column = [&](const ColumnDescription & column_description) -> IColumn & + { + auto column = column_description.type->createColumn(); + column->reserve(num_rows); + auto * column_ptr = column.get(); + block.insert(ColumnWithTypeAndName{std::move(column), column_description.type, column_description.name}); + return *column_ptr; + }; + + /// Create columns. + + /// Column "metric_family_name". + const auto & metric_family_name_description = get_column_description(TimeSeriesColumnNames::MetricFamilyName); + TimeSeriesColumnsValidator validator{time_series_storage_id, time_series_settings}; + validator.validateColumnForMetricFamilyName(metric_family_name_description); + auto & metric_family_name_column = make_column(metric_family_name_description); + + /// Column "type". + const auto & type_description = get_column_description(TimeSeriesColumnNames::Type); + validator.validateColumnForType(type_description); + auto & type_column = make_column(type_description); + + /// Column "unit". + const auto & unit_description = get_column_description(TimeSeriesColumnNames::Unit); + validator.validateColumnForUnit(unit_description); + auto & unit_column = make_column(unit_description); + + /// Column "help". + const auto & help_description = get_column_description(TimeSeriesColumnNames::Help); + validator.validateColumnForHelp(help_description); + auto & help_column = make_column(help_description); + + /// Fill those columns. + for (const auto & element : metrics_metadata) + { + const auto & metric_family_name = element.metric_family_name(); + const auto & type_str = metricTypeToString(element.type()); + const auto & help = element.help(); + const auto & unit = element.unit(); + + metric_family_name_column.insertData(metric_family_name.data(), metric_family_name.length()); + type_column.insertData(type_str.data(), type_str.length()); + unit_column.insertData(unit.data(), unit.length()); + help_column.insertData(help.data(), help.length()); + } + + /// Prepare a result. + BlocksToInsert res; + res.blocks.emplace_back(ViewTarget::Metrics, std::move(block)); + return res; + } + + /// Inserts blocks to target tables. + void insertToTargetTables(BlocksToInsert && blocks, StorageTimeSeries & time_series_storage, ContextPtr context, Poco::Logger * log) + { + auto time_series_storage_id = time_series_storage.getStorageID(); + + for (auto & [table_kind, block] : blocks.blocks) + { + if (block) + { + const auto & target_table_id = time_series_storage.getTargetTableId(table_kind); + + LOG_INFO(log, "{}: Inserting {} rows to the {} table", + time_series_storage_id.getNameForLogs(), block.rows(), toString(table_kind)); + + auto insert_query = std::make_shared(); + insert_query->table_id = target_table_id; + + auto columns_ast = std::make_shared(); + for (const auto & name : block.getNames()) + columns_ast->children.emplace_back(std::make_shared(name)); + insert_query->columns = columns_ast; + + ContextMutablePtr insert_context = Context::createCopy(context); + insert_context->setCurrentQueryId(context->getCurrentQueryId() + ":" + String{toString(table_kind)}); + + LOG_TEST(log, "{}: Executing query: {}", time_series_storage_id.getNameForLogs(), queryToString(insert_query)); + + InterpreterInsertQuery interpreter( + insert_query, + insert_context, + /* allow_materialized= */ false, + /* no_squash= */ false, + /* no_destination= */ false, + /* async_insert= */ false); + + BlockIO io = interpreter.execute(); + PushingPipelineExecutor executor(io.pipeline); + + executor.start(); + executor.push(std::move(block)); + executor.finish(); + } + } + } +} + + +PrometheusRemoteWriteProtocol::PrometheusRemoteWriteProtocol(StoragePtr time_series_storage_, const ContextPtr & context_) + : WithContext(context_) + , time_series_storage(storagePtrToTimeSeries(time_series_storage_)) + , log(getLogger("PrometheusRemoteWriteProtocol")) +{ +} + +PrometheusRemoteWriteProtocol::~PrometheusRemoteWriteProtocol() = default; + + +void PrometheusRemoteWriteProtocol::writeTimeSeries(const google::protobuf::RepeatedPtrField & time_series) +{ + auto time_series_storage_id = time_series_storage->getStorageID(); + + LOG_TRACE(log, "{}: Writing {} time series", + time_series_storage_id.getNameForLogs(), time_series.size()); + + auto time_series_storage_metadata = time_series_storage->getInMemoryMetadataPtr(); + auto time_series_settings = time_series_storage->getStorageSettingsPtr(); + + auto blocks = toBlocks(time_series, getContext(), time_series_storage_id, *time_series_storage_metadata, *time_series_settings); + insertToTargetTables(std::move(blocks), *time_series_storage, getContext(), log.get()); + + LOG_TRACE(log, "{}: {} time series written", + time_series_storage_id.getNameForLogs(), time_series.size()); +} + +void PrometheusRemoteWriteProtocol::writeMetricsMetadata(const google::protobuf::RepeatedPtrField & metrics_metadata) +{ + auto time_series_storage_id = time_series_storage->getStorageID(); + + LOG_TRACE(log, "{}: Writing {} metrics metadata", + time_series_storage_id.getNameForLogs(), metrics_metadata.size()); + + auto time_series_storage_metadata = time_series_storage->getInMemoryMetadataPtr(); + auto time_series_settings = time_series_storage->getStorageSettingsPtr(); + + auto blocks = toBlocks(metrics_metadata, time_series_storage_id, *time_series_storage_metadata, *time_series_settings); + insertToTargetTables(std::move(blocks), *time_series_storage, getContext(), log.get()); + + LOG_TRACE(log, "{}: {} metrics metadata written", + time_series_storage_id.getNameForLogs(), metrics_metadata.size()); +} + +} + +#endif diff --git a/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.h b/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.h new file mode 100644 index 00000000000..24c65e96cbe --- /dev/null +++ b/src/Storages/TimeSeries/PrometheusRemoteWriteProtocol.h @@ -0,0 +1,35 @@ +#pragma once + +#include "config.h" +#if USE_PROMETHEUS_PROTOBUFS + +#include +#include +#include + + +namespace DB +{ +class StorageTimeSeries; + +/// Helper class to support the prometheus remote write protocol. +class PrometheusRemoteWriteProtocol : WithContext +{ +public: + PrometheusRemoteWriteProtocol(StoragePtr time_series_storage_, const ContextPtr & context_); + ~PrometheusRemoteWriteProtocol(); + + /// Insert time series received by remote write protocol to our table. + void writeTimeSeries(const google::protobuf::RepeatedPtrField & time_series); + + /// Insert metrics metadata received by remote write protocol to our table. + void writeMetricsMetadata(const google::protobuf::RepeatedPtrField & metrics_metadata); + +private: + std::shared_ptr time_series_storage; + Poco::LoggerPtr log; +}; + +} + +#endif diff --git a/src/Storages/TimeSeries/TimeSeriesColumnNames.h b/src/Storages/TimeSeries/TimeSeriesColumnNames.h new file mode 100644 index 00000000000..d7b12fdeea8 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesColumnNames.h @@ -0,0 +1,38 @@ +#pragma once + + +namespace DB +{ + +struct TimeSeriesColumnNames +{ + /// The "data" table contains time series: + static constexpr const char * ID = "id"; + static constexpr const char * Timestamp = "timestamp"; + static constexpr const char * Value = "value"; + + /// The "tags" table contains identifiers for each combination of a metric name with corresponding tags (labels): + + /// The default expression specified for the "id" column contains an expression for calculating an identifier of a time series by a metric name and tags. + //static constexpr const char * kID = "id"; + static constexpr const char * MetricName = "metric_name"; + + /// Contains tags which have no corresponding columns specified in the "tags_to_columns" setting. + static constexpr const char * Tags = "tags"; + + /// Contains all tags, including those ones which have corresponding columns specified in the "tags_to_columns" setting. + /// This is a generated column, it's not stored anywhere, it's generated on the fly. + static constexpr const char * AllTags = "all_tags"; + + /// Contains the time range of a time series. + static constexpr const char * MinTime = "min_time"; + static constexpr const char * MaxTime = "max_time"; + + /// The "metrics" table contains general information (metadata) about metrics: + static constexpr const char * MetricFamilyName = "metric_family_name"; + static constexpr const char * Type = "type"; + static constexpr const char * Unit = "unit"; + static constexpr const char * Help = "help"; +}; + +} diff --git a/src/Storages/TimeSeries/TimeSeriesColumnsValidator.cpp b/src/Storages/TimeSeries/TimeSeriesColumnsValidator.cpp new file mode 100644 index 00000000000..a2308857e2e --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesColumnsValidator.cpp @@ -0,0 +1,272 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int INCOMPATIBLE_COLUMNS; + extern const int THERE_IS_NO_COLUMN; +} + + +TimeSeriesColumnsValidator::TimeSeriesColumnsValidator(StorageID time_series_storage_id_, + std::reference_wrapper time_series_settings_) + : time_series_storage_id(std::move(time_series_storage_id_)) + , time_series_settings(time_series_settings_) +{ +} + + +void TimeSeriesColumnsValidator::validateColumns(const ColumnsDescription & columns) const +{ + try + { + validateColumnsImpl(columns); + } + catch (Exception & e) + { + e.addMessage("While checking columns of TimeSeries table {}", time_series_storage_id.getNameForLogs()); + throw; + } +} + + +void TimeSeriesColumnsValidator::validateColumnsImpl(const ColumnsDescription & columns) const +{ + + auto get_column_description = [&](const String & column_name) -> const ColumnDescription & + { + const auto * column = columns.tryGet(column_name); + if (!column) + { + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is required for the TimeSeries table engine", column_name); + } + return *column; + }; + + /// Validate columns for the "data" table. + validateColumnForID(get_column_description(TimeSeriesColumnNames::ID)); + validateColumnForTimestamp(get_column_description(TimeSeriesColumnNames::Timestamp)); + validateColumnForValue(get_column_description(TimeSeriesColumnNames::Value)); + + /// Validate columns for the "tags" table. + validateColumnForMetricName(get_column_description(TimeSeriesColumnNames::MetricName)); + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + validateColumnForTagValue(get_column_description(column_name)); + } + + validateColumnForTagsMap(get_column_description(TimeSeriesColumnNames::Tags)); + validateColumnForTagsMap(get_column_description(TimeSeriesColumnNames::AllTags)); + + /// Validate columns for the "metrics" table. + validateColumnForMetricFamilyName(get_column_description(TimeSeriesColumnNames::MetricFamilyName)); + validateColumnForType(get_column_description(TimeSeriesColumnNames::Type)); + validateColumnForUnit(get_column_description(TimeSeriesColumnNames::Unit)); + validateColumnForHelp(get_column_description(TimeSeriesColumnNames::Help)); +} + + +void TimeSeriesColumnsValidator::validateTargetColumns(ViewTarget::Kind target_kind, const StorageID & target_table_id, const ColumnsDescription & target_columns) const +{ + try + { + validateTargetColumnsImpl(target_kind, target_columns); + } + catch (Exception & e) + { + e.addMessage("While checking columns of table {} which is the {} target of TimeSeries table {}", target_table_id.getNameForLogs(), + toString(target_kind), time_series_storage_id.getNameForLogs()); + throw; + } +} + + +void TimeSeriesColumnsValidator::validateTargetColumnsImpl(ViewTarget::Kind target_kind, const ColumnsDescription & target_columns) const +{ + auto get_column_description = [&](const String & column_name) -> const ColumnDescription & + { + const auto * column = target_columns.tryGet(column_name); + if (!column) + { + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is required for the TimeSeries table engine", column_name); + } + return *column; + }; + + switch (target_kind) + { + case ViewTarget::Data: + { + /// Here "check_default = false" because it's ok for the "id" column in the target table not to contain + /// an expression for calculating the identifier of a time series. + validateColumnForID(get_column_description(TimeSeriesColumnNames::ID), /* check_default= */ false); + + validateColumnForTimestamp(get_column_description(TimeSeriesColumnNames::Timestamp)); + validateColumnForValue(get_column_description(TimeSeriesColumnNames::Value)); + + break; + } + + case ViewTarget::Tags: + { + validateColumnForMetricName(get_column_description(TimeSeriesColumnNames::MetricName)); + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + validateColumnForTagValue(get_column_description(column_name)); + } + + validateColumnForTagsMap(get_column_description(TimeSeriesColumnNames::Tags)); + + break; + } + + case ViewTarget::Metrics: + { + validateColumnForMetricFamilyName(get_column_description(TimeSeriesColumnNames::MetricFamilyName)); + validateColumnForType(get_column_description(TimeSeriesColumnNames::Type)); + validateColumnForUnit(get_column_description(TimeSeriesColumnNames::Unit)); + validateColumnForHelp(get_column_description(TimeSeriesColumnNames::Help)); + break; + } + + default: + UNREACHABLE(); + } +} + + +void TimeSeriesColumnsValidator::validateColumnForID(const ColumnDescription & column, bool check_default) const +{ + if (check_default && !column.default_desc.expression) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "The DEFAULT expression for column {} must contain an expression " + "which will be used to calculate the identifier of each time series: {} {} DEFAULT ...", + column.name, column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForTimestamp(const ColumnDescription & column) const +{ + if (!isDateTime64(removeNullable(column.type))) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected DateTime64", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForTimestamp(const ColumnDescription & column, UInt32 & out_scale) const +{ + auto maybe_datetime64_type = removeNullable(column.type); + if (!isDateTime64(maybe_datetime64_type)) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected DateTime64", + column.name, column.type->getName()); + } + const auto & datetime64_type = typeid_cast(*maybe_datetime64_type); + out_scale = datetime64_type.getScale(); +} + +void TimeSeriesColumnsValidator::validateColumnForValue(const ColumnDescription & column) const +{ + if (!isFloat(removeNullable(column.type))) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected Float32 or Float64", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForMetricName(const ColumnDescription & column) const +{ + validateColumnForTagValue(column); +} + +void TimeSeriesColumnsValidator::validateColumnForMetricName(const ColumnWithTypeAndName & column) const +{ + validateColumnForTagValue(column); +} + +void TimeSeriesColumnsValidator::validateColumnForTagValue(const ColumnDescription & column) const +{ + if (!isString(removeLowCardinalityAndNullable(column.type))) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected String or LowCardinality(String)", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForTagValue(const ColumnWithTypeAndName & column) const +{ + if (!isString(removeLowCardinalityAndNullable(column.type))) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column {} has illegal data type {}, expected String or LowCardinality(String)", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForTagsMap(const ColumnDescription & column) const +{ + if (!isMap(column.type) + || !isString(removeLowCardinality(typeid_cast(*column.type).getKeyType())) + || !isString(removeLowCardinality(typeid_cast(*column.type).getValueType()))) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected Map(String, String) or Map(LowCardinality(String), String)", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForTagsMap(const ColumnWithTypeAndName & column) const +{ + if (!isMap(column.type) + || !isString(removeLowCardinality(typeid_cast(*column.type).getKeyType())) + || !isString(removeLowCardinality(typeid_cast(*column.type).getValueType()))) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column {} has illegal data type {}, expected Map(String, String) or Map(LowCardinality(String), String)", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForMetricFamilyName(const ColumnDescription & column) const +{ + if (!isString(removeLowCardinalityAndNullable(column.type))) + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Column {} has illegal data type {}, expected String or LowCardinality(String)", + column.name, column.type->getName()); + } +} + +void TimeSeriesColumnsValidator::validateColumnForType(const ColumnDescription & column) const +{ + validateColumnForMetricFamilyName(column); +} + +void TimeSeriesColumnsValidator::validateColumnForUnit(const ColumnDescription & column) const +{ + validateColumnForMetricFamilyName(column); +} + +void TimeSeriesColumnsValidator::validateColumnForHelp(const ColumnDescription & column) const +{ + validateColumnForMetricFamilyName(column); +} + +} diff --git a/src/Storages/TimeSeries/TimeSeriesColumnsValidator.h b/src/Storages/TimeSeries/TimeSeriesColumnsValidator.h new file mode 100644 index 00000000000..43a54bf2ad6 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesColumnsValidator.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ColumnsDescription; +struct ColumnDescription; +struct ColumnWithTypeAndName; +struct TimeSeriesSettings; + +/// Checks the types of columns of a TimeSeries table. +class TimeSeriesColumnsValidator +{ +public: + /// Constructor stores a reference to argument `time_series_settings_` (it's unnecessary to copy it). + TimeSeriesColumnsValidator(StorageID time_series_storage_id_, + std::reference_wrapper time_series_settings_); + + /// Checks the columns of a TimeSeries table and throws an exception if some of the required columns don't exist or have illegal types. + void validateColumns(const ColumnsDescription & columns) const; + + /// Checks columns of a target table that a TimeSeries table is going to use. + /// Throws an exception if some of the required columns don't exist or have illegal types. + void validateTargetColumns(ViewTarget::Kind target_kind, const StorageID & target_table_id, const ColumnsDescription & target_columns) const; + + /// Each of the following functions validates a specific column type. + void validateColumnForID(const ColumnDescription & column, bool check_default = true) const; + void validateColumnForTimestamp(const ColumnDescription & column) const; + void validateColumnForTimestamp(const ColumnDescription & column, UInt32 & out_scale) const; + void validateColumnForValue(const ColumnDescription & column) const; + + void validateColumnForMetricName(const ColumnDescription & column) const; + void validateColumnForMetricName(const ColumnWithTypeAndName & column) const; + void validateColumnForTagValue(const ColumnDescription & column) const; + void validateColumnForTagValue(const ColumnWithTypeAndName & column) const; + void validateColumnForTagsMap(const ColumnDescription & column) const; + void validateColumnForTagsMap(const ColumnWithTypeAndName & column) const; + + void validateColumnForMetricFamilyName(const ColumnDescription & column) const; + void validateColumnForType(const ColumnDescription & column) const; + void validateColumnForUnit(const ColumnDescription & column) const; + void validateColumnForHelp(const ColumnDescription & column) const; + +private: + void validateColumnsImpl(const ColumnsDescription & columns) const; + void validateTargetColumnsImpl(ViewTarget::Kind target_kind, const ColumnsDescription & target_columns) const; + + const StorageID time_series_storage_id; + const TimeSeriesSettings & time_series_settings; +}; + +} diff --git a/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp new file mode 100644 index 00000000000..746a6a28274 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp @@ -0,0 +1,473 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCOMPATIBLE_COLUMNS; + extern const int INCORRECT_QUERY; +} + + +TimeSeriesDefinitionNormalizer::TimeSeriesDefinitionNormalizer(StorageID time_series_storage_id_, + std::reference_wrapper time_series_settings_, + const ASTCreateQuery * as_create_query_) + : time_series_storage_id(std::move(time_series_storage_id_)) + , time_series_settings(time_series_settings_) + , as_create_query(as_create_query_) +{ +} + + +void TimeSeriesDefinitionNormalizer::normalize(ASTCreateQuery & create_query) const +{ + reorderColumns(create_query); + addMissingColumns(create_query); + addMissingDefaultForIDColumn(create_query); + + if (as_create_query) + addMissingInnerEnginesFromAsTable(create_query); + + addMissingInnerEngines(create_query); +} + + +void TimeSeriesDefinitionNormalizer::reorderColumns(ASTCreateQuery & create) const +{ + if (!create.columns_list || !create.columns_list->columns) + return; + + auto & columns = create.columns_list->columns->children; + + /// Build a map "column_name -> column_declaration". + std::unordered_map> columns_by_name; + for (const auto & column : columns) + { + auto column_declaration = typeid_cast>(column); + columns_by_name[column_declaration->name] = column_declaration; + } + + /// Remove all columns and then add them again in the canonical order. + columns.clear(); + + auto add_column_in_correct_order = [&](std::string_view column_name) + { + auto it = columns_by_name.find(column_name); + if (it != columns_by_name.end()) + { + /// Add the column back to the list. + columns.push_back(it->second); + + /// Remove the column from the map to allow the check at the end of this function + /// that all columns from the original list are added back to the list. + columns_by_name.erase(it); + } + }; + + /// Reorder columns for the "data" table. + add_column_in_correct_order(TimeSeriesColumnNames::ID); + add_column_in_correct_order(TimeSeriesColumnNames::Timestamp); + add_column_in_correct_order(TimeSeriesColumnNames::Value); + + /// Reorder columns for the "tags" table. + add_column_in_correct_order(TimeSeriesColumnNames::MetricName); + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + add_column_in_correct_order(column_name); + } + + add_column_in_correct_order(TimeSeriesColumnNames::Tags); + add_column_in_correct_order(TimeSeriesColumnNames::AllTags); + + if (time_series_settings.store_min_time_and_max_time) + { + add_column_in_correct_order(TimeSeriesColumnNames::MinTime); + add_column_in_correct_order(TimeSeriesColumnNames::MaxTime); + } + + /// Reorder columns for the "metrics" table. + add_column_in_correct_order(TimeSeriesColumnNames::MetricFamilyName); + add_column_in_correct_order(TimeSeriesColumnNames::Type); + add_column_in_correct_order(TimeSeriesColumnNames::Unit); + add_column_in_correct_order(TimeSeriesColumnNames::Help); + + /// All columns from the original list must be added back to the list. + if (!columns_by_name.empty()) + { + throw Exception( + ErrorCodes::INCOMPATIBLE_COLUMNS, + "{}: Column {} can't be used in this table. " + "The TimeSeries table engine supports only a limited set of columns (id, timestamp, value, metric_name, tags, metric_family_name, type, unit, help). " + "Extra columns representing tags must be specified in the 'tags_to_columns' setting.", + time_series_storage_id.getNameForLogs(), columns_by_name.begin()->first); + } +} + + +void TimeSeriesDefinitionNormalizer::addMissingColumns(ASTCreateQuery & create) const +{ + if (!create.as_table.empty()) + { + /// If the create query has the "AS other_table" clause ("CREATE TABLE table AS other_table") + /// then all columns must be extracted from that "other_table". + /// Function InterpreterCreateQuery::getTablePropertiesAndNormalizeCreateQuery() will do that for us, + /// we don't need to fill missing columns by default in that case. + return; + } + + if (!create.columns_list) + create.set(create.columns_list, std::make_shared()); + + if (!create.columns_list->columns) + create.columns_list->set(create.columns_list->columns, std::make_shared()); + auto & columns = create.columns_list->columns->children; + + /// Here in this function we rely on that the columns are already sorted in the canonical order (see the reorderColumns() function). + /// NOTE: The order in which this function processes columns MUST be exactly the same as the order in reorderColumns(). + size_t position = 0; + + auto is_next_column_named = [&](std::string_view column_name) + { + if (position < columns.size() && (typeid_cast(*columns[position]).name == column_name)) + { + ++position; + return true; + } + return false; + }; + + auto make_new_column = [&](const String & column_name, ASTPtr type) + { + auto new_column = std::make_shared(); + new_column->name = column_name; + new_column->type = type; + columns.insert(columns.begin() + position, new_column); + ++position; + }; + + auto get_uuid_type = [] { return makeASTDataType("UUID"); }; + auto get_datetime_type = [] { return makeASTDataType("DateTime64", std::make_shared(3ul)); }; + auto get_float_type = [] { return makeASTDataType("Float64"); }; + auto get_string_type = [] { return makeASTDataType("String"); }; + auto get_lc_string_type = [&] { return makeASTDataType("LowCardinality", get_string_type()); }; + auto get_string_to_string_map_type = [&] { return makeASTDataType("Map", get_string_type(), get_string_type()); }; + auto get_lc_string_to_string_map_type = [&] { return makeASTDataType("Map", get_lc_string_type(), get_string_type()); }; + + auto make_nullable = [&](std::shared_ptr type) + { + if (type->name == "Nullable") + return type; + else + return makeASTDataType("Nullable", type); + }; + + /// Add missing columns for the "data" table. + if (!is_next_column_named(TimeSeriesColumnNames::ID)) + make_new_column(TimeSeriesColumnNames::ID, get_uuid_type()); + + if (!is_next_column_named(TimeSeriesColumnNames::Timestamp)) + make_new_column(TimeSeriesColumnNames::Timestamp, get_datetime_type()); + + auto timestamp_column = typeid_cast>(columns[position - 1]); + auto timestamp_type = typeid_cast>(timestamp_column->type->ptr()); + + if (!is_next_column_named(TimeSeriesColumnNames::Value)) + make_new_column(TimeSeriesColumnNames::Value, get_float_type()); + + /// Add missing columns for the "tags" table. + if (!is_next_column_named(TimeSeriesColumnNames::MetricName)) + { + /// We use 'LowCardinality(String)' as the default type of the `metric_name` column: + /// it looks like a correct optimization because there are shouldn't be too many different metrics. + make_new_column(TimeSeriesColumnNames::MetricName, get_lc_string_type()); + } + + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + if (!is_next_column_named(column_name)) + make_new_column(column_name, get_string_type()); + } + + if (!is_next_column_named(TimeSeriesColumnNames::Tags)) + { + /// We use 'Map(LowCardinality(String), String)' as the default type of the `tags` column: + /// it looks like a correct optimization because there are shouldn't be too many different tag names. + make_new_column(TimeSeriesColumnNames::Tags, get_lc_string_to_string_map_type()); + } + + if (!is_next_column_named(TimeSeriesColumnNames::AllTags)) + { + /// The `all_tags` column is virtual (it's calculated on the fly and never stored anywhere) + /// so here we don't need to use the LowCardinality optimization as for the `tags` column. + make_new_column(TimeSeriesColumnNames::AllTags, get_string_to_string_map_type()); + } + + if (time_series_settings.store_min_time_and_max_time) + { + /// We use Nullable(DateTime64(3)) as the default type of the `min_time` and `max_time` columns. + /// It's nullable because it allows the aggregation (see aggregate_min_time_and_max_time) work correctly even + /// for rows in the "tags" table which doesn't have `min_time` and `max_time` (because they have no matching rows in the "data" table). + + if (!is_next_column_named(TimeSeriesColumnNames::MinTime)) + make_new_column(TimeSeriesColumnNames::MinTime, make_nullable(timestamp_type)); + if (!is_next_column_named(TimeSeriesColumnNames::MaxTime)) + make_new_column(TimeSeriesColumnNames::MaxTime, make_nullable(timestamp_type)); + } + + /// Add missing columns for the "metrics" table. + if (!is_next_column_named(TimeSeriesColumnNames::MetricFamilyName)) + make_new_column(TimeSeriesColumnNames::MetricFamilyName, get_string_type()); + + if (!is_next_column_named(TimeSeriesColumnNames::Type)) + make_new_column(TimeSeriesColumnNames::Type, get_string_type()); + + if (!is_next_column_named(TimeSeriesColumnNames::Unit)) + make_new_column(TimeSeriesColumnNames::Unit, get_string_type()); + + if (!is_next_column_named(TimeSeriesColumnNames::Help)) + make_new_column(TimeSeriesColumnNames::Help, get_string_type()); + + /// If the following fails that means the order in which columns are processed in this function doesn't match the order of columns in reorderColumns(). + chassert(position == columns.size()); +} + + +void TimeSeriesDefinitionNormalizer::addMissingDefaultForIDColumn(ASTCreateQuery & create) const +{ + /// Find the 'id' column and make a default expression for it. + if (!create.columns_list || !create.columns_list->columns) + return; + + auto & columns = create.columns_list->columns->children; + auto * it = std::find_if(columns.begin(), columns.end(), [](const ASTPtr & column) + { + return typeid_cast(*column).name == TimeSeriesColumnNames::ID; + }); + + if (it == columns.end()) + return; + + auto & column_declaration = typeid_cast(**it); + + /// We add a DEFAULT for the 'id' column only if it's not specified yet. + if (column_declaration.default_specifier.empty() && !column_declaration.default_expression) + { + column_declaration.default_specifier = "DEFAULT"; + column_declaration.default_expression = chooseIDAlgorithm(column_declaration); + } +} + + +ASTPtr TimeSeriesDefinitionNormalizer::chooseIDAlgorithm(const ASTColumnDeclaration & id_column) const +{ + /// Build a list of arguments for a hash function. + /// All hash functions below allow multiple arguments, so we use two arguments: metric_name, all_tags. + ASTs arguments_for_hash_function; + arguments_for_hash_function.push_back(std::make_shared(TimeSeriesColumnNames::MetricName)); + + if (time_series_settings.use_all_tags_column_to_generate_id) + { + arguments_for_hash_function.push_back(std::make_shared(TimeSeriesColumnNames::AllTags)); + } + else + { + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + arguments_for_hash_function.push_back(std::make_shared(column_name)); + } + arguments_for_hash_function.push_back(std::make_shared(TimeSeriesColumnNames::Tags)); + } + + auto make_hash_function = [&](const String & function_name) + { + auto function = std::make_shared(); + function->name = function_name; + auto arguments_list = std::make_shared(); + arguments_list->children = std::move(arguments_for_hash_function); + function->arguments = arguments_list; + return function; + }; + + /// The type of a hash function depends on the type of the 'id' column. + auto id_type = DataTypeFactory::instance().get(id_column.type); + WhichDataType id_type_which(*id_type); + + if (id_type_which.isUInt64()) + { + return make_hash_function("sipHash64"); + } + else if (id_type_which.isFixedString() && typeid_cast(*id_type).getN() == 16) + { + return make_hash_function("sipHash128"); + } + else if (id_type_which.isUUID()) + { + return makeASTFunction("reinterpretAsUUID", make_hash_function("sipHash128")); + } + else if (id_type_which.isUInt128()) + { + return makeASTFunction("reinterpretAsUInt128", make_hash_function("sipHash128")); + } + else + { + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "{}: The DEFAULT expression for column {} must contain an expression " + "which will be used to calculate the identifier of each time series: {} {} DEFAULT ... " + "If the DEFAULT expression is not specified then it can be chosen implicitly but only if the column type is one of these: UInt64, UInt128, UUID. " + "For type {} the DEFAULT expression can't be chosen automatically, so please specify it explicitly", + time_series_storage_id.getNameForLogs(), id_column.name, id_column.name, id_type->getName(), id_type->getName()); + } +} + + +void TimeSeriesDefinitionNormalizer::addMissingInnerEnginesFromAsTable(ASTCreateQuery & create) const +{ + if (!as_create_query) + return; + + for (auto target_kind : {ViewTarget::Data, ViewTarget::Tags, ViewTarget::Metrics}) + { + if (as_create_query->hasTargetTableID(target_kind)) + { + /// It's unlikely correct to use "CREATE table AS other_table" when "other_table" has external tables like this: + /// CREATE TABLE other_table ENGINE=TimeSeries data mydata + /// (because `table` would use the same table "mydata"). + /// Thus we just prohibit that. + QualifiedTableName as_table{as_create_query->getDatabase(), as_create_query->getTable()}; + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "Cannot CREATE a table AS {}.{} because it has external tables", + backQuoteIfNeed(as_table.database), backQuoteIfNeed(as_table.table)); + } + + auto inner_table_engine = create.getTargetInnerEngine(target_kind); + if (!inner_table_engine) + { + /// Copy an inner engine's definition from the other table. + inner_table_engine = as_create_query->getTargetInnerEngine(target_kind); + if (inner_table_engine) + create.setTargetInnerEngine(target_kind, typeid_cast>(inner_table_engine->clone())); + } + } +} + + +void TimeSeriesDefinitionNormalizer::addMissingInnerEngines(ASTCreateQuery & create) const +{ + for (auto target_kind : {ViewTarget::Data, ViewTarget::Tags, ViewTarget::Metrics}) + { + if (create.hasTargetTableID(target_kind)) + continue; /// External target is set, inner engine is not needed. + + auto inner_table_engine = create.getTargetInnerEngine(target_kind); + if (inner_table_engine && inner_table_engine->engine) + continue; /// Engine is set already, skip it. + + if (!inner_table_engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but the inner ENGINE is not: just set default one. + inner_table_engine = std::make_shared(); + create.setTargetInnerEngine(target_kind, inner_table_engine); + } + + /// Set engine by default. + setInnerEngineByDefault(target_kind, *inner_table_engine); + } +} + + +void TimeSeriesDefinitionNormalizer::setInnerEngineByDefault(ViewTarget::Kind inner_table_kind, ASTStorage & inner_storage_def) const +{ + switch (inner_table_kind) + { + case ViewTarget::Data: + { + inner_storage_def.set(inner_storage_def.engine, makeASTFunction("MergeTree")); + inner_storage_def.engine->no_empty_args = false; + + if (!inner_storage_def.order_by && !inner_storage_def.primary_key && inner_storage_def.engine->name.ends_with("MergeTree")) + { + inner_storage_def.set(inner_storage_def.order_by, + makeASTFunction("tuple", + std::make_shared(TimeSeriesColumnNames::ID), + std::make_shared(TimeSeriesColumnNames::Timestamp))); + } + break; + } + + case ViewTarget::Tags: + { + String engine_name; + if (time_series_settings.aggregate_min_time_and_max_time) + engine_name = "AggregatingMergeTree"; + else + engine_name = "ReplacingMergeTree"; + + inner_storage_def.set(inner_storage_def.engine, makeASTFunction(engine_name)); + inner_storage_def.engine->no_empty_args = false; + + if (!inner_storage_def.order_by && !inner_storage_def.primary_key && inner_storage_def.engine->name.ends_with("MergeTree")) + { + inner_storage_def.set(inner_storage_def.primary_key, + std::make_shared(TimeSeriesColumnNames::MetricName)); + + ASTs order_by_list; + order_by_list.push_back(std::make_shared(TimeSeriesColumnNames::MetricName)); + order_by_list.push_back(std::make_shared(TimeSeriesColumnNames::ID)); + + if (time_series_settings.store_min_time_and_max_time && !time_series_settings.aggregate_min_time_and_max_time) + { + order_by_list.push_back(std::make_shared(TimeSeriesColumnNames::MinTime)); + order_by_list.push_back(std::make_shared(TimeSeriesColumnNames::MaxTime)); + } + + auto order_by_tuple = std::make_shared(); + order_by_tuple->name = "tuple"; + auto arguments_list = std::make_shared(); + arguments_list->children = std::move(order_by_list); + order_by_tuple->arguments = arguments_list; + inner_storage_def.set(inner_storage_def.order_by, order_by_tuple); + } + break; + } + + case ViewTarget::Metrics: + { + inner_storage_def.set(inner_storage_def.engine, makeASTFunction("ReplacingMergeTree")); + inner_storage_def.engine->no_empty_args = false; + + if (!inner_storage_def.order_by && !inner_storage_def.primary_key && inner_storage_def.engine->name.ends_with("MergeTree")) + { + inner_storage_def.set(inner_storage_def.order_by, std::make_shared(TimeSeriesColumnNames::MetricFamilyName)); + } + break; + } + + default: + UNREACHABLE(); /// This function must not be called with any other `kind`. + } +} + +} diff --git a/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.h b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.h new file mode 100644 index 00000000000..1f959eb3ce0 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTColumnDeclaration; +class ASTCreateQuery; +struct ColumnDescription; +struct TimeSeriesSettings; + +/// Normalizes a TimeSeries table definition. +class TimeSeriesDefinitionNormalizer +{ +public: + /// Constructor stores a reference to argument `time_series_settings_` (it's unnecessary to copy it). + TimeSeriesDefinitionNormalizer(StorageID time_series_storage_id_, + std::reference_wrapper time_series_settings_, + const ASTCreateQuery * as_create_query_); + + /// Adds missing columns to the definition and reorders all the columns in the canonical way. + /// Also adds engines of inner tables to the definition if they aren't specified yet. + /// The `as_table_create_query` parameter must be nullptr if it isn't a "CREATE AS query". + void normalize(ASTCreateQuery & create_query) const; + +private: + /// Reorders existing columns in the canonical way. + void reorderColumns(ASTCreateQuery & create) const; + + /// Adds missing columns with data types set by default.. + void addMissingColumns(ASTCreateQuery & create) const; + + /// Adds the DEFAULT expression for the 'id' column if it isn't specified yet. + void addMissingDefaultForIDColumn(ASTCreateQuery & create) const; + + /// Generates a formulae for calculating the identifier of a time series from the metric name and all the tags. + ASTPtr chooseIDAlgorithm(const ASTColumnDeclaration & id_column) const; + + /// Copies the definitions of inner engines from "CREATE AS
" if this is that kind of query. + void addMissingInnerEnginesFromAsTable(ASTCreateQuery & create) const; + + /// Adds engines of inner tables to the definition if they aren't specified yet. + void addMissingInnerEngines(ASTCreateQuery & create) const; + + /// Sets the engine of an inner table by default. + void setInnerEngineByDefault(ViewTarget::Kind inner_table_kind, ASTStorage & inner_storage_def) const; + + const StorageID time_series_storage_id; + const TimeSeriesSettings & time_series_settings; + const ASTCreateQuery * as_create_query = nullptr; +}; + +} diff --git a/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.cpp b/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.cpp new file mode 100644 index 00000000000..5f616982a6f --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.cpp @@ -0,0 +1,191 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +TimeSeriesInnerTablesCreator::TimeSeriesInnerTablesCreator(ContextPtr context_, + StorageID time_series_storage_id_, + std::reference_wrapper time_series_columns_, + std::reference_wrapper time_series_settings_) + : WithContext(context_) + , time_series_storage_id(std::move(time_series_storage_id_)) + , time_series_columns(time_series_columns_) + , time_series_settings(time_series_settings_) +{ +} + +TimeSeriesInnerTablesCreator::~TimeSeriesInnerTablesCreator() = default; + + +ColumnsDescription TimeSeriesInnerTablesCreator::getInnerTableColumnsDescription(ViewTarget::Kind inner_table_kind) const +{ + ColumnsDescription columns; + + switch (inner_table_kind) + { + case ViewTarget::Data: + { + /// Column "id". + { + auto id_column = time_series_columns.get(TimeSeriesColumnNames::ID); + /// The expression for calculating the identifier of a time series can be transferred only to the "tags" inner table + /// (because it usually depends on columns like "metric_name" or "all_tags"). + id_column.default_desc = {}; + columns.add(std::move(id_column)); + } + + /// Column "timestamp". + columns.add(time_series_columns.get(TimeSeriesColumnNames::Timestamp)); + + /// Column "value". + columns.add(time_series_columns.get(TimeSeriesColumnNames::Value)); + break; + } + + case ViewTarget::Tags: + { + /// Column "id". + columns.add(time_series_columns.get(TimeSeriesColumnNames::ID)); + + /// Column "metric_name". + columns.add(time_series_columns.get(TimeSeriesColumnNames::MetricName)); + + /// Columns corresponding to specific tags specified in the "tags_to_columns" setting. + const Map & tags_to_columns = time_series_settings.tags_to_columns; + for (const auto & tag_name_and_column_name : tags_to_columns) + { + const auto & tuple = tag_name_and_column_name.safeGet(); + const auto & column_name = tuple.at(1).safeGet(); + columns.add(time_series_columns.get(column_name)); + } + + /// Column "tags". + columns.add(time_series_columns.get(TimeSeriesColumnNames::Tags)); + + /// Column "all_tags". + if (time_series_settings.use_all_tags_column_to_generate_id) + { + ColumnDescription all_tags_column = time_series_columns.get(TimeSeriesColumnNames::AllTags); + /// Column "all_tags" is here only to calculate the identifier of a time series for the "id" column, so it can be ephemeral. + all_tags_column.default_desc.kind = ColumnDefaultKind::Ephemeral; + if (!all_tags_column.default_desc.expression) + { + all_tags_column.default_desc.ephemeral_default = true; + all_tags_column.default_desc.expression = makeASTFunction("defaultValueOfTypeName", std::make_shared(all_tags_column.type->getName())); + } + columns.add(std::move(all_tags_column)); + } + + /// Columns "min_time" and "max_time". + if (time_series_settings.store_min_time_and_max_time) + { + auto min_time_column = time_series_columns.get(TimeSeriesColumnNames::MinTime); + auto max_time_column = time_series_columns.get(TimeSeriesColumnNames::MaxTime); + if (time_series_settings.aggregate_min_time_and_max_time) + { + AggregateFunctionProperties properties; + auto min_function = AggregateFunctionFactory::instance().get("min", NullsAction::EMPTY, {min_time_column.type}, {}, properties); + auto custom_name = std::make_unique(min_function, DataTypes{min_time_column.type}, Array{}); + min_time_column.type = DataTypeFactory::instance().getCustom(std::make_unique(std::move(custom_name))); + + auto max_function = AggregateFunctionFactory::instance().get("max", NullsAction::EMPTY, {max_time_column.type}, {}, properties); + custom_name = std::make_unique(max_function, DataTypes{max_time_column.type}, Array{}); + max_time_column.type = DataTypeFactory::instance().getCustom(std::make_unique(std::move(custom_name))); + } + columns.add(std::move(min_time_column)); + columns.add(std::move(max_time_column)); + } + + break; + } + + case ViewTarget::Metrics: + { + columns.add(time_series_columns.get(TimeSeriesColumnNames::MetricFamilyName)); + columns.add(time_series_columns.get(TimeSeriesColumnNames::Type)); + columns.add(time_series_columns.get(TimeSeriesColumnNames::Unit)); + columns.add(time_series_columns.get(TimeSeriesColumnNames::Help)); + break; + } + + default: + UNREACHABLE(); + } + + return columns; +} + + +StorageID TimeSeriesInnerTablesCreator::getInnerTableID(ViewTarget::Kind inner_table_kind, const UUID & inner_table_uuid) const +{ + StorageID res = time_series_storage_id; + if (time_series_storage_id.hasUUID()) + res.table_name = fmt::format(".inner_id.{}.{}", toString(inner_table_kind), time_series_storage_id.uuid); + else + res.table_name = fmt::format(".inner.{}.{}", toString(inner_table_kind), time_series_storage_id.table_name); + res.uuid = inner_table_uuid; + return res; +} + + +std::shared_ptr TimeSeriesInnerTablesCreator::getInnerTableCreateQuery( + ViewTarget::Kind inner_table_kind, + const UUID & inner_table_uuid, + const std::shared_ptr & inner_storage_def) const +{ + auto manual_create_query = std::make_shared(); + + auto inner_table_id = getInnerTableID(inner_table_kind, inner_table_uuid); + manual_create_query->setDatabase(inner_table_id.database_name); + manual_create_query->setTable(inner_table_id.table_name); + manual_create_query->uuid = inner_table_id.uuid; + manual_create_query->has_uuid = inner_table_id.uuid != UUIDHelpers::Nil; + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(getInnerTableColumnsDescription(inner_table_kind))); + manual_create_query->set(manual_create_query->columns_list, new_columns_list); + + if (inner_storage_def) + manual_create_query->set(manual_create_query->storage, inner_storage_def->clone()); + + return manual_create_query; +} + +StorageID TimeSeriesInnerTablesCreator::createInnerTable( + ViewTarget::Kind inner_table_kind, + const UUID & inner_table_uuid, + const std::shared_ptr & inner_storage_def) const +{ + /// We will make a query to create the inner target table. + auto create_context = Context::createCopy(getContext()); + + auto manual_create_query = getInnerTableCreateQuery(inner_table_kind, inner_table_uuid, inner_storage_def); + + /// Create the inner target table. + InterpreterCreateQuery create_interpreter(manual_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + return DatabaseCatalog::instance().getTable({manual_create_query->getDatabase(), manual_create_query->getTable()}, getContext())->getStorageID(); +} + +} diff --git a/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.h b/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.h new file mode 100644 index 00000000000..5778dd77398 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesInnerTablesCreator.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTCreateQuery; +class ColumnsDescription; +struct TimeSeriesSettings; + +/// Generates inner tables for the TimeSeries table engine. +class TimeSeriesInnerTablesCreator : public WithContext +{ +public: + /// Constructor stores references to arguments `time_series_columns_` and `time_series_settings_` (it's unnecessary to copy them). + TimeSeriesInnerTablesCreator(ContextPtr context_, + StorageID time_series_storage_id_, + std::reference_wrapper time_series_columns_, + std::reference_wrapper time_series_settings_); + + ~TimeSeriesInnerTablesCreator(); + + /// Returns a column description of an inner table. + ColumnsDescription getInnerTableColumnsDescription(ViewTarget::Kind inner_table_kind) const; + + /// Returns a StorageID of an inner table. + StorageID getInnerTableID(ViewTarget::Kind inner_table_kind, const UUID & inner_table_uuid) const; + + /// Generates a CREATE TABLE query for an inner table. + std::shared_ptr getInnerTableCreateQuery(ViewTarget::Kind inner_table_kind, + const UUID & inner_table_uuid, + const std::shared_ptr & inner_storage_def) const; + + /// Creates an inner table. + StorageID createInnerTable(ViewTarget::Kind inner_table_kind, + const UUID & inner_table_uuid, + const std::shared_ptr & inner_storage_def) const; + +private: + const StorageID time_series_storage_id; + const ColumnsDescription & time_series_columns; + const TimeSeriesSettings & time_series_settings; +}; + +} diff --git a/src/Storages/TimeSeries/TimeSeriesSettings.cpp b/src/Storages/TimeSeries/TimeSeriesSettings.cpp new file mode 100644 index 00000000000..3a15be59191 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesSettings.cpp @@ -0,0 +1,34 @@ +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(TimeSeriesSettingsTraits, LIST_OF_TIME_SERIES_SETTINGS) + +void TimeSeriesSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } +} + +} diff --git a/src/Storages/TimeSeries/TimeSeriesSettings.h b/src/Storages/TimeSeries/TimeSeriesSettings.h new file mode 100644 index 00000000000..4dc6a436cd0 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesSettings.h @@ -0,0 +1,29 @@ +#pragma once + +#include + + +namespace DB +{ +class ASTStorage; + +#define LIST_OF_TIME_SERIES_SETTINGS(M, ALIAS) \ + M(Map, tags_to_columns, Map{}, "Map specifying which tags should be put to separate columns of the 'tags' table. Syntax: {'tag1': 'column1', 'tag2' : column2, ...}", 0) \ + M(Bool, use_all_tags_column_to_generate_id, true, "When generating an expression to calculate an identifier of a time series, this flag enables using the 'all_tags' column in that calculation. The 'all_tags' is a virtual column containing all tags except the metric name", 0) \ + M(Bool, store_min_time_and_max_time, true, "If set to true then the table will store 'min_time' and 'max_time' for each time series", 0) \ + M(Bool, aggregate_min_time_and_max_time, true, "When creating an inner target 'tags' table, this flag enables using 'SimpleAggregateFunction(min, Nullable(DateTime64(3)))' instead of just 'Nullable(DateTime64(3))' as the type of the 'min_time' column, and the same for the 'max_time' column", 0) \ + M(Bool, filter_by_min_time_and_max_time, true, "If set to true then the table will use the 'min_time' and 'max_time' columns for filtering time series", 0) \ + +DECLARE_SETTINGS_TRAITS(TimeSeriesSettingsTraits, LIST_OF_TIME_SERIES_SETTINGS) + +/// Settings for the TimeSeries table engine. +/// Could be loaded from a CREATE TABLE query (SETTINGS clause). For example: +/// CREATE TABLE mytable ENGINE = TimeSeries() SETTINGS tags_to_columns = {'job':'job', 'instance':'instance'} DATA ENGINE = ReplicatedMergeTree('zkpath', 'replica'), ... +struct TimeSeriesSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +using TimeSeriesSettingsPtr = std::shared_ptr; + +} diff --git a/src/Storages/TimeSeries/TimeSeriesTagNames.h b/src/Storages/TimeSeries/TimeSeriesTagNames.h new file mode 100644 index 00000000000..23b005ed414 --- /dev/null +++ b/src/Storages/TimeSeries/TimeSeriesTagNames.h @@ -0,0 +1,13 @@ +#pragma once + + +namespace DB +{ + +/// Label names with special meaning. +struct TimeSeriesTagNames +{ + static constexpr const char * MetricName = "__name__"; +}; + +} diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 19694830c4e..f0d276e4e56 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -37,11 +36,17 @@ #include #include +#include #include +#include +#include +#include +#include #include "Functions/FunctionsLogical.h" #include "Functions/IFunction.h" #include "Functions/IFunctionAdaptors.h" #include "Functions/indexHint.h" +#include #include #include #include @@ -51,6 +56,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + namespace VirtualColumnUtils { @@ -119,14 +129,45 @@ NameSet getVirtualNamesForFileLikeStorage() return {"_path", "_file", "_size", "_time", "_etag"}; } -VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) +std::unordered_map parseHivePartitioningKeysAndValues(const String & path) +{ + std::string pattern = "([^/]+)=([^/]+)/"; + re2::StringPiece input_piece(path); + + std::unordered_map key_values; + std::string key, value; + std::unordered_map used_keys; + while (RE2::FindAndConsume(&input_piece, pattern, &key, &value)) + { + auto it = used_keys.find(key); + if (it != used_keys.end() && it->second != value) + throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key); + used_keys.insert({key, value}); + + auto col_name = key; + key_values[col_name] = value; + } + return key_values; +} + +VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional format_settings_) { VirtualColumnsDescription desc; auto add_virtual = [&](const auto & name, const auto & type) { if (storage_columns.has(name)) + { + if (!context->getSettingsRef().use_hive_partitioning) + return; + + if (storage_columns.size() == 1) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path); + auto local_type = storage_columns.get(name).type; + storage_columns.remove(name); + desc.addEphemeral(name, local_type, ""); return; + } desc.addEphemeral(name, type, ""); }; @@ -137,6 +178,22 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_time", makeNullable(std::make_shared())); add_virtual("_etag", std::make_shared(std::make_shared())); + if (context->getSettingsRef().use_hive_partitioning) + { + auto map = parseHivePartitioningKeysAndValues(path); + auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); + for (auto & item : map) + { + auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw); + if (type == nullptr) + type = std::make_shared(); + if (type->canBeInsideLowCardinality()) + add_virtual(item.first, std::make_shared(type)); + else + add_virtual(item.first, type); + } + } + return desc; } @@ -196,8 +253,12 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values) + VirtualsForFileLikeStorage virtual_values, ContextPtr context) { + std::unordered_map hive_map; + if (context->getSettingsRef().use_hive_partitioning) + hive_map = parseHivePartitioningKeysAndValues(virtual_values.path); + for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -231,6 +292,10 @@ void addRequestedFileLikeStorageVirtualsToChunk( else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } + else if (auto it = hive_map.find(virtual_column.getNameInStorage()); it != hive_map.end()) + { + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), convertFieldToType(Field(it->second), *virtual_column.type))->convertToFullColumnIfConst()); + } else if (virtual_column.name == "_etag") { if (virtual_values.etag) @@ -281,9 +346,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) } static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( - const ActionsDAG::Node * node, - const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -292,8 +355,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_partial_result) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -301,7 +371,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( if (node_copy.children.size() == 1) { const ActionsDAG::Node * res = node_copy.children.front(); - /// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires + /// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires /// at least two arguments; also it can't be reduced to (256) because result type is different. if (!res->result_type->equals(*node->result_type)) { @@ -319,7 +389,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child) return nullptr; return &node_copy; @@ -333,7 +403,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions().clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag.getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -367,22 +438,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +std::optional +splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result) { if (!predicate) return {}; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result); if (!res) return {}; return ActionsDAG::cloneSubDAG({res}, true); } -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) +void filterBlockWithPredicate( + const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate); if (dag) filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 1ed369300ff..6aa08b2aef2 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -26,9 +27,13 @@ namespace VirtualColumnUtils /// /// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed" /// if there are subqueries. +/// +/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. +/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). +/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate +/// cannot be evaluated using the columns from the block. +void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true); -/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))). -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context); @@ -41,7 +46,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically +/// on the given inputs. +/// allow_partial_result must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true); /// Extract from the input stream a set of `name` column values template @@ -51,12 +64,16 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) - res.insert((*data.column)[i].get()); + res.insert((*data.column)[i].safeGet()); return res; } NameSet getVirtualNamesForFileLikeStorage(); -VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); +VirtualColumnsDescription getVirtualsForFileLikeStorage( + ColumnsDescription & storage_columns, + const ContextPtr & context, + const std::string & sample_path = "", + std::optional format_settings_ = std::nullopt); std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); @@ -88,7 +105,7 @@ struct VirtualsForFileLikeStorage void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values); + VirtualsForFileLikeStorage virtual_values, ContextPtr context); } } diff --git a/src/Storages/fuzzers/CMakeLists.txt b/src/Storages/fuzzers/CMakeLists.txt index ec56b853666..f67552716a2 100644 --- a/src/Storages/fuzzers/CMakeLists.txt +++ b/src/Storages/fuzzers/CMakeLists.txt @@ -4,4 +4,4 @@ clickhouse_add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.c target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms clickhouse_functions) clickhouse_add_executable (columns_description_fuzzer columns_description_fuzzer.cpp) -target_link_libraries (columns_description_fuzzer PRIVATE dbms clickhouse_functions) +target_link_libraries (columns_description_fuzzer PRIVATE clickhouse_functions) diff --git a/src/Storages/fuzzers/columns_description_fuzzer.cpp b/src/Storages/fuzzers/columns_description_fuzzer.cpp index 469bfe2fb38..e39afccd1f9 100644 --- a/src/Storages/fuzzers/columns_description_fuzzer.cpp +++ b/src/Storages/fuzzers/columns_description_fuzzer.cpp @@ -1,4 +1,5 @@ #include +#include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 9d23f132759..1408e120bc5 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -102,16 +102,16 @@ ColumnsDescription getStructureOfRemoteTableInShard( { ColumnDescription column; - column.name = (*name)[i].get(); + column.name = (*name)[i].safeGet(); - String data_type_name = (*type)[i].get(); + String data_type_name = (*type)[i].safeGet(); column.type = data_type_factory.get(data_type_name); - String kind_name = (*default_kind)[i].get(); + String kind_name = (*default_kind)[i].safeGet(); if (!kind_name.empty()) { column.default_desc.kind = columnDefaultKindFromString(kind_name); - String expr_str = (*default_expr)[i].get(); + String expr_str = (*default_expr)[i].safeGet(); column.default_desc.expression = parseQuery( expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, settings.max_parser_depth, settings.max_parser_backtracks); @@ -207,8 +207,8 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( size_t size = name_col.size(); for (size_t i = 0; i < size; ++i) { - auto name = name_col[i].get(); - auto type_name = type_col[i].get(); + auto name = name_col[i].safeGet(); + auto type_name = type_col[i].safeGet(); auto storage_column = storage_columns.tryGetPhysical(name); if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 8f33314397c..4ed74763810 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,6 +26,9 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); +void registerStorageTimeSeries(StorageFactory & factory); + #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -126,6 +129,9 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); + registerStorageFuzzQuery(factory); + registerStorageTimeSeries(factory); + #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 6765e112bb9..5a63c118e2d 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ private: {"a", std::make_shared()}, {"b", std::make_shared()}, {"foo", std::make_shared()}, + {"is_value", DataTypeFactory::instance().get("Bool")}, }), TableWithColumnNamesAndTypes( createDBAndTable("table2"), @@ -411,6 +413,14 @@ TEST(TransformQueryForExternalDatabase, Analyzer) R"(SELECT "column" FROM "test"."table")"); check(state, 1, {"column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo"}, - "SELECT * FROM table WHERE (column) IN (1)", + "SELECT * EXCEPT (is_value) FROM table WHERE (column) IN (1)", R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" IN (1))"); + + check(state, 1, {"is_value"}, + "SELECT is_value FROM table WHERE is_value = true", + R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = true)"); + + check(state, 1, {"is_value"}, + "SELECT is_value FROM table WHERE is_value = 1", + R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = 1)"); } diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 552b9fde986..69d24c879bd 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -83,7 +83,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a String argument for EXPLAIN kind, got '{}'", getName(), queryToString(kind_arg)); - ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.get()); + ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.safeGet()); auto explain_query = std::make_shared(kind); const auto * settings_arg = function->arguments->children[1]->as(); @@ -92,7 +92,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a serialized string settings argument, got '{}'", getName(), queryToString(function->arguments->children[1])); - const auto & settings_str = settings_arg->value.get(); + const auto & settings_str = settings_arg->value.safeGet(); if (!settings_str.empty()) { const Settings & settings = context->getSettingsRef(); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index af327cfe54e..5cd249f000d 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -48,7 +48,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { fd = static_cast( - (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); + (type == Field::Types::Int64) ? literal->value.safeGet() : literal->value.safeGet()); if (fd < 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative"); } diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp index 06a48f0e25f..27ed50fb711 100644 --- a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -76,9 +76,9 @@ void TableFunctionMergeTreeIndex::parseArguments(const ASTPtr & ast_function, Co "Table function '{}' expected bool flag for 'with_marks' argument", getName()); if (value.getType() == Field::Types::Bool) - with_marks = value.get(); + with_marks = value.safeGet(); else - with_marks = value.get(); + with_marks = value.safeGet(); } if (!params.empty()) diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b2cf1b4675e..94279d1bf6d 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 550d9cc799b..39392a4c44c 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -84,7 +84,8 @@ ColumnsDescription TableFunctionObjectStorage< context->checkAccess(getSourceAccessType()); ColumnsDescription columns; auto storage = getObjectStorage(context, !is_insert_query); - resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, sample_path, context); return columns; } else diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index f87ba6d1c6d..aca751c2840 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -15,7 +15,6 @@ #include #include -#include namespace DB diff --git a/src/TableFunctions/TableFunctionTimeSeries.cpp b/src/TableFunctions/TableFunctionTimeSeries.cpp new file mode 100644 index 00000000000..62ea088eba0 --- /dev/null +++ b/src/TableFunctions/TableFunctionTimeSeries.cpp @@ -0,0 +1,128 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +template +void TableFunctionTimeSeriesTarget::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + const auto & args_func = ast_function->as(); + + if (!args_func.arguments) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", name); + + auto & args = args_func.arguments->children; + + if ((args.size() != 1) && (args.size() != 2)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires one or two arguments: {}([database, ] time_series_table)", name, name); + + if (args.size() == 1) + { + /// timeSeriesMetrics( [my_db.]my_time_series_table ) + if (const auto * id = args[0]->as()) + { + if (auto table_id = id->createTable()) + time_series_storage_id = table_id->getTableId(); + } + } + + if (time_series_storage_id.empty()) + { + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + if (args.size() == 1) + { + /// timeSeriesMetrics( 'my_time_series_table' ) + time_series_storage_id.table_name = checkAndGetLiteralArgument(args[0], "table_name"); + } + else + { + /// timeSeriesMetrics( 'mydb', 'my_time_series_table' ) + time_series_storage_id.database_name = checkAndGetLiteralArgument(args[0], "database_name"); + time_series_storage_id.table_name = checkAndGetLiteralArgument(args[1], "table_name"); + } + } + + if (time_series_storage_id.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Couldn't get a table name from the arguments of the {} table function", name); + + time_series_storage_id = context->resolveStorageID(time_series_storage_id); + target_table_type_name = getTargetTable(context)->getName(); +} + + +template +StoragePtr TableFunctionTimeSeriesTarget::getTargetTable(const ContextPtr & context) const +{ + auto time_series_storage = storagePtrToTimeSeries(DatabaseCatalog::instance().getTable(time_series_storage_id, context)); + return time_series_storage->getTargetTable(target_kind, context); +} + + +template +StoragePtr TableFunctionTimeSeriesTarget::executeImpl( + const ASTPtr & /* ast_function */, + ContextPtr context, + const String & /* table_name */, + ColumnsDescription /* cached_columns */, + bool /* is_insert_query */) const +{ + return getTargetTable(context); +} + +template +ColumnsDescription TableFunctionTimeSeriesTarget::getActualTableStructure(ContextPtr context, bool /* is_insert_query */) const +{ + return getTargetTable(context)->getInMemoryMetadataPtr()->columns; +} + +template +const char * TableFunctionTimeSeriesTarget::getStorageTypeName() const +{ + return target_table_type_name.c_str(); +} + + +void registerTableFunctionTimeSeries(TableFunctionFactory & factory) +{ + factory.registerFunction>( + {.documentation = { + .description=R"(Provides direct access to the 'data' target table for a specified TimeSeries table.)", + .examples{{"timeSeriesData", "SELECT * from timeSeriesData('mydb', 'time_series_table');", ""}}, + .categories{"Time Series"}} + }); + factory.registerFunction>( + {.documentation = { + .description=R"(Provides direct access to the 'tags' target table for a specified TimeSeries table.)", + .examples{{"timeSeriesTags", "SELECT * from timeSeriesTags('mydb', 'time_series_table');", ""}}, + .categories{"Time Series"}} + }); + factory.registerFunction>( + {.documentation = { + .description=R"(Provides direct access to the 'metrics' target table for a specified TimeSeries table.)", + .examples{{"timeSeriesMetrics", "SELECT * from timeSeriesMetrics('mydb', 'time_series_table');", ""}}, + .categories{"Time Series"}} + }); +} + +} diff --git a/src/TableFunctions/TableFunctionTimeSeries.h b/src/TableFunctions/TableFunctionTimeSeries.h new file mode 100644 index 00000000000..57654413fe4 --- /dev/null +++ b/src/TableFunctions/TableFunctionTimeSeries.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Table functions timeSeriesData('mydb', 'my_ts_table'), timeSeriesTags('mydb', 'my_ts_table'), timeSeriesMetrics('mydb', 'my_ts_table') +/// return the data table, the tags table, and the metrics table respectively associated with any TimeSeries table mydb.my_ts_table +template +class TableFunctionTimeSeriesTarget : public ITableFunction +{ +public: + static constexpr auto name = (target_kind == ViewTarget::Data) + ? "timeSeriesData" + : ((target_kind == ViewTarget::Tags) ? "timeSeriesTags" : "timeSeriesMetrics"); + + String getName() const override { return name; } + +private: + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + const char * getStorageTypeName() const override; + + StoragePtr getTargetTable(const ContextPtr & context) const; + + StorageID time_series_storage_id = StorageID::createEmpty(); + String target_table_type_name; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ca4913898f9..0b21de00f86 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,6 +26,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif @@ -56,6 +57,7 @@ void registerTableFunctions() registerTableFunctionFormat(factory); registerTableFunctionExplain(factory); + registerTableFunctionTimeSeries(factory); registerTableFunctionObjectStorage(factory); registerTableFunctionObjectStorageCluster(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index efde4d6dcdc..1dd6341b67e 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,6 +23,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif @@ -67,6 +68,8 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory); void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); void registerDataLakeTableFunctions(TableFunctionFactory & factory); +void registerTableFunctionTimeSeries(TableFunctionFactory & factory); + void registerTableFunctions(); } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index d22bf674df4..5a1aa179e01 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -135,6 +135,12 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() +if (TARGET ch_contrib::qpl) + set(USE_QPL 1) +endif() +if (TARGET ch_contrib::qatlib) + set(USE_QATLIB 1) +endif() if (TARGET ch_contrib::avrocpp) set(USE_AVRO 1) endif() @@ -158,11 +164,14 @@ endif() if (TARGET ch_contrib::bcrypt) set(USE_BCRYPT 1) endif() +if (TARGET ch_contrib::usearch) + set(USE_USEARCH 1) +endif() if (TARGET ch_contrib::ssh) set(USE_SSH 1) endif() -if (TARGET ch_contrib::fiu) - set(FIU_ENABLE 1) +if (TARGET ch_contrib::libfiu) + set(USE_LIBFIU 1) endif() if (TARGET ch_contrib::libarchive) set(USE_LIBARCHIVE 1) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index bd92465e1aa..c8edbdc5932 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,3 @@ 01624_soft_constraints -02354_vector_search_queries # Check after ConstantNode refactoring 02944_variant_as_common_type diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index f12d27979ce..ecb4aef87dd 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,4 +15,5 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True +logging-fstring-interpolation = False \ No newline at end of file diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index f3d7d24f717..9457fa32ad3 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -143,6 +143,8 @@ class DebianArtifactory: print(f" {cmd}") Shell.check(cmd, strict=True) Shell.check("sync") + time.sleep(10) + Shell.check(f"lsof +D R2MountPoint.MOUNT_POINT", verbose=True) def test_packages(self): Shell.check("docker pull ubuntu:latest", strict=True) diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 3cc88634004..89714b2fb4b 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -46,6 +46,7 @@ def parse_args(): MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5 AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json" +AUTORELEASE_MATRIX_PARAMS = "/tmp/autorelease_params.json" @dataclasses.dataclass @@ -74,6 +75,14 @@ class AutoReleaseInfo: with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f: print(json.dumps(dataclasses.asdict(self), indent=2), file=f) + # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches + params = dataclasses.asdict(self) + params["releases"] = [ + release for release in params["releases"] if release["ready"] + ] + with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f: + print(json.dumps(params, indent=2), file=f) + @staticmethod def from_file() -> "AutoReleaseInfo": with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -102,7 +111,6 @@ def _prepare(token): refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}")) assert refs - refs.sort(key=lambda ref: ref.ref) latest_release_tag_ref = refs[-1] latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) @@ -110,6 +118,10 @@ def _prepare(token): f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}", ).split("\n") commit_num = len(commits) + if latest_release_tag.tag.endswith("new"): + print("It's a new release branch - skip auto release for it") + continue + print( f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]" ) @@ -133,16 +145,33 @@ def _prepare(token): commits_to_branch_head += 1 continue - commit_ci_status = CI.GH.get_commit_status_by_name( - token=token, - commit_sha=commit, - status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), - ) + # TODO: switch to check if CI is entirely green + statuses = [ + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), + ), + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=CI.JobNames.STATELESS_TEST_RELEASE, + ), + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=CI.JobNames.STATEFUL_TEST_RELEASE, + ), + ] commit_sha = commit - if commit_ci_status == SUCCESS: + if any(status == SUCCESS for status in statuses): + commit_ci_status = SUCCESS break - print(f"CI status [{commit_ci_status}] - skip") + print(f"CI status [{statuses}] - skip") commits_to_branch_head += 1 ready = False diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index 39e426945d3..554ba339892 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -19,7 +19,6 @@ from env_helper import TEMP_PATH from git_helper import git_runner, is_shallow from github_helper import GitHub, PullRequest, PullRequests, Repository from s3_helper import S3Helper -from get_robot_token import get_best_robot_token from ci_utils import Shell from version_helper import ( FILE_WITH_VERSION_PATH, @@ -115,7 +114,6 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: # pylint: enable=protected-access if repo_name not in repos: repos[repo_name] = pr.base.repo - in_changelog = False merge_commit = pr.merge_commit_sha if merge_commit is None: logging.warning("PR %s does not have merge-commit, skipping", pr.number) @@ -173,7 +171,6 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--gh-user-or-token", help="user name or GH token to authenticate", - default=get_best_robot_token(), ) parser.add_argument( "--gh-password", @@ -291,7 +288,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri # Normalize bug fixes if ( re.match( - r"(?i)bug\Wfix", + r".*(?i)bug\Wfix", category, ) # Map "Critical Bug Fix" to "Bug fix" category for changelog diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8d0414ce7a8..d201b6602f5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -50,7 +50,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ( ERROR, - FAILURE, PENDING, SUCCESS, BuildResult, @@ -62,11 +61,11 @@ from report import ( FAIL, ) from s3_helper import S3Helper -from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings from ci_buddy import CIBuddy +from stopwatch import Stopwatch from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -334,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info): CI.JobNames.BUILD_CHECK, ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name)) - if rerun_helper.is_already_finished_by_status(): + if ( + rerun_helper.is_already_finished_by_status() + and not Utils.is_job_triggered_manually() + ): print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status @@ -345,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info): skip_status = status.state # ci cache check - if not to_be_skipped and not no_cache: + if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually(): ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() job_config = CI.get_job_config(job_name) if ci_cache.is_successful( @@ -370,8 +372,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # skip_status = SUCCESS already there GH.print_in_group("Commit Status Data", job_status) - # create pre report - jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) + # create dummy report + jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped) jr.dump() if not to_be_skipped: @@ -985,23 +987,26 @@ def _run_test(job_name: str, run_command: str) -> int: else: print("Use run command from the workflow") env["CHECK_NAME"] = job_name + env["MAX_RUN_TIME"] = str(timeout or 0) print(f"Going to start run command [{run_command}]") stopwatch = Stopwatch() job_log = Path(TEMP_PATH) / "job_log.txt" with TeePopen(run_command, job_log, env, timeout) as process: + print(f"Job process started, pid [{process.process.pid}]") retcode = process.wait() if retcode != 0: print(f"Run action failed for: [{job_name}] with exit code [{retcode}]") - if timeout and process.timeout_exceeded: - print(f"Timeout {timeout} exceeded, dumping the job report") - JobReport( - status=FAILURE, - description=f"Timeout {timeout} exceeded", - test_results=[TestResult.create_check_timeout_expired(timeout)], - start_time=stopwatch.start_time_str, - duration=stopwatch.duration_seconds, - additional_files=[job_log], - ).dump() + if process.timeout_exceeded: + print(f"Job timed out: [{job_name}] exit code [{retcode}]") + assert JobReport.exist(), "JobReport real or dummy must be present" + jr = JobReport.load() + if jr.dummy: + print( + f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time" + ) + jr.test_results = [TestResult.create_check_timeout_expired()] + jr.duration = stopwatch.duration_seconds + jr.additional_files += [job_log] print(f"Run action done for: [{job_name}]") return retcode @@ -1204,7 +1209,7 @@ def main() -> int: job_report ), "BUG. There must be job report either real report, or pre-report if job was killed" error_description = "" - if not job_report.pre_report: + if not job_report.dummy: # it's a real job report ch_helper = ClickHouseHelper() check_url = "" @@ -1328,10 +1333,20 @@ def main() -> int: if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + check_url = "" + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or _get_ext_check_name(args.job_name), + ) post_commit_status( commit, ERROR, - "", + check_url, "Error: " + error_description, _get_ext_check_name(args.job_name), pr_info, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 7a19eb6f827..7b0a7850576 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -94,7 +94,8 @@ class CI: package_type="deb", static_binary_name="aarch64", additional_pkgs=True, - ) + ), + runner_type=Runners.BUILDER_ARM, ), BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( @@ -162,6 +163,7 @@ class CI: tidy=True, comment="clang-tidy is used for static analysis", ), + timeout=14400, ), BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( @@ -315,6 +317,7 @@ class CI: JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], random_bucket="parrepl_with_sanitizer", + timeout=3600, ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 @@ -342,17 +345,17 @@ class CI: runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=4, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -397,10 +400,14 @@ class CI: required_builds=[BuildNames.PACKAGE_DEBUG], pr_only=True ), JobNames.INTEGRATION_TEST_ASAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], + release_only=True, + num_batches=4, + timeout=10800, ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_ASAN], + num_batches=6, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 @@ -494,9 +501,10 @@ class CI: JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], ), - JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], - ), + # TODO: job does not work at all, uncomment and fix + # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( + # required_builds=[BuildNames.PACKAGE_RELEASE], + # ), JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), @@ -531,7 +539,10 @@ class CI: JobNames.FAST_TEST: JobConfig( pr_only=True, digest=DigestConfig( - include_paths=["./tests/queries/0_stateless/"], + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/docker_scripts/", + ], exclude_files=[".md"], docker=["clickhouse/fasttest"], ), diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48847b0d7a6..9d95a19790f 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -57,6 +57,7 @@ class Runners(metaclass=WithIter): """ BUILDER = "builder" + BUILDER_ARM = "builder-aarch64" STYLE_CHECKER = "style-checker" STYLE_CHECKER_ARM = "style-checker-aarch64" FUNC_TESTER = "func-tester" @@ -203,7 +204,7 @@ class JobNames(metaclass=WithIter): PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" - SQL_LOGIC_TEST = "Sqllogic test (release)" + # SQL_LOGIC_TEST = "Sqllogic test (release)" SQLANCER = "SQLancer (release)" SQLANCER_DEBUG = "SQLancer (debug)" @@ -331,7 +332,7 @@ class JobConfig: # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds - timeout: Optional[int] = None + timeout: int = 7200 # sets number of batches for a multi-batch job num_batches: int = 1 # label that enables job in CI, if set digest isn't used @@ -414,13 +415,13 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stateless-test"], ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", @@ -431,6 +432,7 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stateful-test"], @@ -448,6 +450,7 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stress-test"], @@ -459,12 +462,13 @@ class CommonJobConfigs: UPGRADE_TEST = JobConfig( job_name_keyword="upgrade", digest=DigestConfig( - include_paths=["./tests/ci/upgrade_check.py"], + include_paths=["./tests/ci/upgrade_check.py", "./tests/docker_scripts/"], exclude_files=[".md"], - docker=["clickhouse/upgrade-check"], + docker=["clickhouse/stress-test"], ), run_command="upgrade_check.py", runner_type=Runners.STRESS_TESTER, + timeout=3600, ) INTEGRATION_TEST = JobConfig( job_name_keyword="integration", diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index d807f5be09f..e8d9e7dc254 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -18,6 +18,7 @@ class Envs: ) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") + GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "") class WithIter(type): @@ -102,21 +103,29 @@ class GH: assert len(commit_sha) == 40 assert Utils.is_hex(commit_sha) assert not Utils.is_hex(token) - url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" + + url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses" headers = { "Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json", } - response = requests.get(url, headers=headers, timeout=5) if isinstance(status_name, str): status_name = (status_name,) - if response.status_code == 200: - assert "next" not in response.links, "Response truncated" - statuses = response.json() - for status in statuses: - if status["context"] in status_name: - return status["state"] # type: ignore + + while url: + response = requests.get(url, headers=headers, timeout=5) + if response.status_code == 200: + statuses = response.json() + for status in statuses: + if status["context"] in status_name: + return status["state"] # type: ignore + + # Check if there is a next page + url = response.links.get("next", {}).get("url") + else: + break + return "" @staticmethod @@ -167,6 +176,11 @@ class GH: latest_branch = Shell.get_output( 'gh pr list --label release --repo ClickHouse/ClickHouse --search "sort:created" -L1 --json headRefName' ) + if latest_branch: + latest_branch = json.loads(latest_branch)[0]["headRefName"] + print( + f"Latest branch [{latest_branch}], release branch [{branch}], release latest [{latest_branch == branch}]" + ) return latest_branch == branch @@ -269,3 +283,7 @@ class Utils: ): res = res.replace(*r) return res + + @staticmethod + def is_job_triggered_manually(): + return "robot" not in Envs.GITHUB_ACTOR diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 27eba273ce0..68268b033fe 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -61,6 +61,7 @@ class ReleaseContextManager: # create initial release info self.release_info = ReleaseInfo( release_branch="NA", + release_type="NA", commit_sha=args.ref, release_tag="NA", version="NA", @@ -93,6 +94,7 @@ class ReleaseContextManager: @dataclasses.dataclass class ReleaseInfo: version: str + release_type: str release_tag: str release_branch: str commit_sha: str @@ -131,7 +133,7 @@ class ReleaseInfo: return self def prepare( - self, commit_ref: str, release_type: str, skip_tag_check: bool + self, commit_ref: str, release_type: str, _skip_tag_check: bool ) -> "ReleaseInfo": version = None release_branch = None @@ -143,17 +145,18 @@ class ReleaseInfo: assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch - Shell.check( - f"git merge-base --is-ancestor {commit_ref} origin/master", - strict=True, - verbose=True, - ) + if commit_ref != "master": + Shell.check( + f"git merge-base --is-ancestor {commit_ref} origin/master", + strict=True, + verbose=True, + ) with checkout(commit_ref): commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) - release_branch = "master" + release_branch = f"{version.major}.{version.minor}" expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new" version.bump().with_description(VersionType.NEW) assert ( @@ -204,10 +207,11 @@ class ReleaseInfo: expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): pass - elif not skip_tag_check: - assert ( - False - ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?" + # TODO: uncomment and check with dry-run + # elif not skip_tag_check: + # assert ( + # False + # ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?" previous_release_sha = Shell.get_output_or_raise( f"git rev-list -n1 {previous_release_tag}" @@ -238,6 +242,7 @@ class ReleaseInfo: self.release_progress = ReleaseProgress.STARTED self.progress_status = ReleaseProgressDescription.OK self.latest = latest_release + self.release_type = release_type return self def push_release_tag(self, dry_run: bool) -> None: @@ -262,16 +267,15 @@ class ReleaseInfo: @staticmethod def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{CI.Envs.GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" - Shell.check(cmd, dry_run=dry_run, strict=True) + res = Shell.check(cmd, dry_run=dry_run, verbose=True) + if not res: + # not a critical error - do not fail. branch might be created already (recovery case) + print("WARNING: failed to create backport labels for the new branch") def push_new_release_branch(self, dry_run: bool) -> None: - assert ( - self.release_branch == "master" - ), "New release branch can be created only for release type [new]" git = Git() version = get_version_from_repo(git=git) - new_release_branch = f"{version.major}.{version.minor}" - stable_release_type = version.get_stable_release_type() + new_release_branch = self.release_branch version_after_release = copy(version) version_after_release.bump() assert ( @@ -285,11 +289,8 @@ class ReleaseInfo: print( f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" ) - with checkout(self.release_branch): + with checkout("master"): with checkout_new(new_release_branch): - pr_labels = f"--label {CI.Labels.RELEASE}" - if stable_release_type == VersionType.LTS: - pr_labels += f" --label {CI.Labels.RELEASE_LTS}" cmd_push_branch = ( f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" ) @@ -302,67 +303,108 @@ class ReleaseInfo: ReleaseInfo._create_gh_label( f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run ) - Shell.check( - f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' - --head {new_release_branch} {pr_labels} - --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' - """, - dry_run=dry_run, - strict=True, - verbose=True, - ) def get_version_bump_branch(self): return f"bump_version_{self.version}" def update_version_and_contributors_list(self, dry_run: bool) -> None: - # Bump version, update contributors list, create PR - branch_upd_version_contributors = self.get_version_bump_branch() + # Bump version, update contributors list, create on release branch with checkout(self.commit_sha): git = Git() version = get_version_from_repo(git=git) - if self.release_branch == "master": + if self.release_type == "patch": + assert ( + version.string == self.version + ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" + version.bump_patch() + else: + version.reset_tweak() + version.with_description(version.get_stable_release_type()) + + with checkout(self.release_branch): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" + cmd_push_branch = f"{GIT_PREFIX} push" + Shell.check( + cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True + ) + Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True) + if dry_run: + Shell.check( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + Shell.check( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + + # TODO: move to new GH step? + if self.release_type == "new": + print("Update version on master branch") + branch_upd_version_contributors = self.get_version_bump_branch() + with checkout(self.commit_sha): + git = Git() + version = get_version_from_repo(git=git) version.bump() version.with_description(VersionType.TESTING) - else: - version.with_description(version.get_stable_release_type()) - assert ( - version.string == self.version - ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" - with checkout(self.release_branch): - with checkout_new(branch_upd_version_contributors): - update_cmake_version(version) - update_contributors(raise_error=True) - cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" - cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" - actor = os.getenv("GITHUB_ACTOR", "") or "me" - body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n" - cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body \"{body}\" --assignee {actor}" + with checkout("master"): + with checkout_new(branch_upd_version_contributors): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" + cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" + actor = os.getenv("GITHUB_ACTOR", "") or "me" + body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n" + cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base master --body \"{body}\" --assignee {actor}" + Shell.check( + cmd_commit_version_upd, + strict=True, + dry_run=dry_run, + verbose=True, + ) + Shell.check( + cmd_push_branch, strict=True, dry_run=dry_run, verbose=True + ) + Shell.check( + cmd_create_pr, strict=True, dry_run=dry_run, verbose=True + ) + if dry_run: + Shell.check( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + Shell.check( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GH.get_pr_url_by_branch( + branch=branch_upd_version_contributors + ) + + # TODO: move to new GH step? + print("Create Release PR") + with checkout(self.release_branch): + pr_labels = f"--label {CI.Labels.RELEASE}" + if version.get_stable_release_type() == VersionType.LTS: + pr_labels += f" --label {CI.Labels.RELEASE_LTS}" Shell.check( - cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True + f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {self.release_branch}' \ + --head {self.release_branch} {pr_labels} \ + --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'""", + dry_run=dry_run, + strict=True, + verbose=True, ) - Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True) - Shell.check(cmd_create_pr, strict=True, dry_run=dry_run, verbose=True) - if dry_run: - Shell.check( - f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", - verbose=True, - ) - Shell.check( - f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", - verbose=True, - ) - self.version_bump_pr = "dry-run" - else: - self.version_bump_pr = GH.get_pr_url_by_branch( - branch=branch_upd_version_contributors - ) def get_change_log_branch(self): return f"auto/{self.release_tag}" def update_release_info(self, dry_run: bool) -> "ReleaseInfo": - if self.release_branch != "master": + if self.release_type == "patch": if not self.changelog_pr: branch = self.get_change_log_branch() if not dry_run: @@ -371,21 +413,22 @@ class ReleaseInfo: url = "dry-run" print(f"ChangeLog PR url [{url}]") self.changelog_pr = url - - if not self.version_bump_pr: - branch = self.get_version_bump_branch() - if not dry_run: - url = GH.get_pr_url_by_branch(branch=branch) - else: - url = "dry-run" - print(f"Version bump PR url [{url}]") - self.version_bump_pr = url - - self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - print(f"Release url [{self.release_url}]") - self.docker = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" + else: + # new release branch - find version bump pr on a master branch + branch = self.get_version_bump_branch() + if not dry_run: + url = GH.get_pr_url_by_branch(branch=branch) + else: + url = "dry-run" + print(f"Version bump PR url [{url}]") + self.version_bump_pr = url + + self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + print(f"Release url [{self.release_url}]") + self.dump() + return self def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: @@ -410,35 +453,40 @@ class ReleaseInfo: def merge_prs(self, dry_run: bool) -> None: repo = CI.Envs.GITHUB_REPOSITORY - assert self.version_bump_pr - if dry_run: - version_bump_pr_num = 12345 - else: - version_bump_pr_num = int(self.version_bump_pr.split("/")[-1]) - print("Merging Version bump PR") - res_1 = Shell.check( - f"gh pr merge {version_bump_pr_num} --repo {repo} --merge --auto", - verbose=True, - dry_run=dry_run, - ) - - res_2 = True - if not self.release_tag.endswith("-new"): + if self.release_type == "patch": assert self.changelog_pr print("Merging ChangeLog PR") if dry_run: changelog_pr_num = 23456 else: changelog_pr_num = int(self.changelog_pr.split("/")[-1]) - res_2 = Shell.check( + res = Shell.check( f"gh pr merge {changelog_pr_num} --repo {repo} --merge --auto", verbose=True, dry_run=dry_run, ) else: - assert not self.changelog_pr + if not dry_run: + assert not self.changelog_pr + res = True - self.prs_merged = res_1 and res_2 + if self.release_type == "new": + assert self.version_bump_pr + print("Merging Version Bump PR") + if dry_run: + version_bump_pr = 23456 + else: + version_bump_pr = int(self.version_bump_pr.split("/")[-1]) + res = res and Shell.check( + f"gh pr merge {version_bump_pr} --repo {repo} --merge --auto", + verbose=True, + dry_run=dry_run, + ) + else: + if not dry_run: + assert not self.version_bump_pr + + self.prs_merged = res class RepoTypes: @@ -759,7 +807,7 @@ if __name__ == "__main__": release_info.prepare( commit_ref=args.ref, release_type=args.release_type, - skip_tag_check=args.skip_tag_check, + _skip_tag_check=args.skip_tag_check, ) if args.download_packages: diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 786a529e0a9..c8dbcd10245 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -93,7 +93,7 @@ def process_single_image( results = [] # type: TestResults for ver in versions: stopwatch = Stopwatch() - for i in range(5): + for i in range(2): success, build_log = build_and_push_one_image( image, ver, additional_cache, push, from_tag ) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 3251ec5644e..34439c19f0a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -70,7 +70,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--tag-type", type=str, - choices=("head", "release", "latest-release"), + choices=("head", "release", "release-latest"), default="head", help="defines required tags for resulting docker image. " "head - for master image (tag: head) " diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index ed727dd3659..55eefcf9714 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -31,15 +31,14 @@ def get_fasttest_cmd( "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--network=host " # required to get access to IAM credentials f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " - f"-e FASTTEST_SOURCE=/ClickHouse " + f"-e FASTTEST_SOURCE=/repo " f"-e FASTTEST_CMAKE_FLAGS='-DCOMPILER_CACHE=sccache' " f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} " f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " f"-e SCCACHE_BUCKET={S3_BUILDS_BUCKET} -e SCCACHE_S3_KEY_PREFIX=ccache/sccache " "-e stage=clone_submodules " - f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse " - f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " - f"--volume={output_path}:/test_output {image}" + f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/repo " + f"--volume={output_path}:/test_output {image} /repo/tests/docker_scripts/fasttest_runner.sh" ) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 52970404d2d..ce2ead59d1a 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -5,10 +5,11 @@ import csv import logging import os import re +import signal import subprocess import sys from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials @@ -25,11 +26,12 @@ from report import ( TestResults, read_test_results, FAILURE, + TestResult, ) from stopwatch import Stopwatch from tee_popen import TeePopen from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell NO_CHANGES_MSG = "Nothing to run" @@ -113,29 +115,28 @@ def get_run_command( if flaky_check: envs.append("-e NUM_TRIES=50") - envs.append("-e MAX_RUN_TIME=2800") envs += [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) - volume_with_broken_test = ( - f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " - if "analyzer" not in check_name - else "" - ) + + if "stateful" in check_name.lower(): + run_script = "/repo/tests/docker_scripts/stateful_runner.sh" + elif "stateless" in check_name.lower(): + run_script = "/repo/tests/docker_scripts/stateless_runner.sh" + else: + assert False return ( - f"docker run --volume={builds_path}:/package_folder " + f"docker run --rm --name func-tester --volume={builds_path}:/package_folder " # For dmesg and sysctl "--privileged " - f"{ci_logs_args}" - f"--volume={repo_path}/tests:/usr/share/clickhouse-test " - f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client " - f"{volume_with_broken_test}" + f"{ci_logs_args} " + f"--volume={repo_path}:/repo " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} {run_script}" ) @@ -195,7 +196,7 @@ def process_results( state, description = status[0][0], status[0][1] if ret_code != 0: state = ERROR - description += " (but script exited with an error)" + description = f"Job failed, exit code: {ret_code}. " + description try: results_path = result_directory / "test_results.tsv" @@ -237,7 +238,19 @@ def parse_args(): return parser.parse_args() +test_process = None # type: Optional[TeePopen] +timeout_expired = False + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True) + + def main(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -325,11 +338,13 @@ def main(): logging.info("Going to run func tests: %s", run_command) with TeePopen(run_command, run_log_path) as process: + global test_process + test_process = process retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: - logging.info("Run failed") + logging.info("Run failed, exit code %s", retcode) try: subprocess.check_call( @@ -345,6 +360,13 @@ def main(): state, description, test_results, additional_logs = process_results( retcode, result_path, server_log_path ) + if timeout_expired: + description = "Timeout expired" + state = FAILURE + test_results.insert( + 0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds) + ) + else: print( "This is validate bugfix or flaky check run, but no changes test to run - skip with success" diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 84718462ab5..c3b71b85022 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -9,6 +9,7 @@ import random import re import shlex import shutil +import signal import string import subprocess import sys @@ -16,11 +17,13 @@ import time import zlib # for crc32 from collections import defaultdict from itertools import chain -from typing import Any, Dict +from typing import Any, Dict, Optional from env_helper import IS_CI from integration_test_images import IMAGES from tee_popen import TeePopen +from report import JOB_TIMEOUT_TEST_NAME +from stopwatch import Stopwatch MAX_RETRY = 1 NUM_WORKERS = 5 @@ -69,9 +72,9 @@ def get_changed_tests_to_run(pr_info, repo_path): return [] for fpath in changed_files: - if "tests/integration/test_" in fpath: + if re.search(r"tests/integration/test_.*/test.*\.py", fpath) is not None: logging.info("File %s changed and seems like integration test", fpath) - result.add(fpath.split("/")[2]) + result.add("/".join(fpath.split("/")[2:])) return filter_existing_tests(result, repo_path) @@ -621,6 +624,9 @@ class ClickhouseIntegrationTestsRunner: test_data_dirs = {} for i in range(num_tries): + if timeout_expired: + print("Timeout expired - break test group execution") + break logging.info("Running test group %s for the %s retry", test_group, i) clear_ip_tables_and_restart_daemons() @@ -657,6 +663,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + global runner_subprocess + runner_subprocess = proc proc.wait() extra_logs_names = [log_basename] @@ -780,6 +788,9 @@ class ClickhouseIntegrationTestsRunner: logs = [] tries_num = 1 if should_fail else FLAKY_TRIES_COUNT for i in range(tries_num): + if timeout_expired: + print("Timeout expired - break flaky check execution") + break final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( @@ -839,6 +850,7 @@ class ClickhouseIntegrationTestsRunner: return result_state, status_text, test_result, logs def run_impl(self, repo_path, build_path): + stopwatch = Stopwatch() if self.flaky_check or self.bugfix_validate_check: return self.run_flaky_check( repo_path, build_path, should_fail=self.bugfix_validate_check @@ -921,6 +933,9 @@ class ClickhouseIntegrationTestsRunner: random.shuffle(items_to_run) for group, tests in items_to_run: + if timeout_expired: + print("Timeout expired - break tests execution") + break logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0 @@ -981,6 +996,17 @@ class ClickhouseIntegrationTestsRunner: status_text = "Timeout, " + status_text result_state = "failure" + if timeout_expired: + logging.error( + "Job killed by external timeout signal - setting status to failure!" + ) + status_text = "Job timeout expired, " + status_text + result_state = "failure" + # add mock test case to make timeout visible in job report and in ci db + test_result.insert( + 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") + ) + if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" @@ -1001,6 +1027,7 @@ def write_results(results_file, status_file, results, status): def run(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH") @@ -1035,5 +1062,17 @@ def run(): logging.info("Result written") +timeout_expired = False +runner_subprocess = None # type:Optional[subprocess.Popen] + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + if runner_subprocess: + runner_subprocess.send_signal(signal.SIGTERM) + + if __name__ == "__main__": run() diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index d9e33229932..8f19dd7d023 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -75,7 +75,7 @@ def get_run_command( f"--volume={result_path}:/test_output " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} " - "python3 ./utils/runner.py" + "python3 /usr/share/clickhouse-test/fuzz/runner.py" ) diff --git a/tests/ci/report.py b/tests/ci/report.py index 0b6c818aed0..a1b25b994c7 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" JOB_STARTED_TEST_NAME = "STARTED" JOB_FINISHED_TEST_NAME = "COMPLETED" +JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired" @dataclass @@ -277,8 +278,8 @@ class TestResult: self.log_files.append(log_path) @staticmethod - def create_check_timeout_expired(timeout: float) -> "TestResult": - return TestResult("Check timeout expired", "FAIL", timeout) + def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult": + return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration) TestResults = List[TestResult] @@ -303,7 +304,7 @@ class JobReport: # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False # indicates that report generated by CI script in order to check later if job was killed before real report is generated - pre_report: bool = False + dummy: bool = False exit_code: int = -1 @staticmethod @@ -311,7 +312,7 @@ class JobReport: return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") @classmethod - def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport": + def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport": return JobReport( status=status, description="", @@ -320,7 +321,7 @@ class JobReport: duration=0.0, additional_files=[], job_skipped=job_skipped, - pre_report=True, + dummy=True, ) def update_duration(self): @@ -738,13 +739,24 @@ def create_test_html_report( if test_results: rows_part = [] num_fails = 0 - has_test_time = False + has_test_time = any(tr.time is not None for tr in test_results) has_log_urls = False - # Display entires with logs at the top (they correspond to failed tests) - test_results.sort( - key=lambda result: result.raw_logs is None and result.log_files is None - ) + def sort_key(status): + if "fail" in status.lower(): + return 0 + elif "error" in status.lower(): + return 1 + elif "not" in status.lower(): + return 2 + elif "ok" in status.lower(): + return 10 + elif "success" in status.lower(): + return 9 + else: + return 5 + + test_results.sort(key=lambda result: sort_key(result.status)) for test_result in test_results: colspan = 0 @@ -770,9 +782,11 @@ def create_test_html_report( row.append(f'') colspan += 1 - if test_result.time is not None: - has_test_time = True - row.append(f"") + if has_test_time: + if test_result.time is not None: + row.append(f"") + else: + row.append("") colspan += 1 if test_result.log_urls is not None: diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index 63880f07e92..7fe44c235c7 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -31,7 +31,7 @@ IMAGE_NAME = "clickhouse/sqllogic-test" def get_run_command( builds_path: Path, - repo_tests_path: Path, + repo_path: Path, result_path: Path, server_log_path: Path, image: DockerImage, @@ -39,11 +39,11 @@ def get_run_command( return ( f"docker run " f"--volume={builds_path}:/package_folder " - f"--volume={repo_tests_path}:/clickhouse-tests " + f"--volume={repo_path}:/repo " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {image}" + f"--cap-add=SYS_PTRACE {image} /repo/tests/docker_scripts/sqllogic_runner.sh" ) @@ -94,8 +94,6 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - repo_tests_path = repo_path / "tests" - packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -111,7 +109,7 @@ def main(): run_command = get_run_command( # run script inside docker packages_path, - repo_tests_path, + repo_path, result_path, server_log_path, docker_image, diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 85da601e379..f9656e60448 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -57,10 +57,16 @@ def get_run_command( additional_envs: List[str], ci_logs_args: str, image: DockerImage, + upgrade_check: bool, ) -> str: envs = [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) + if upgrade_check: + run_script = "/repo/tests/docker_scripts/upgrade_runner.sh" + else: + run_script = "/repo/tests/docker_scripts/stress_runner.sh" + cmd = ( "docker run --cap-add=SYS_PTRACE " # For dmesg and sysctl @@ -70,8 +76,8 @@ def get_run_command( f"{ci_logs_args}" f"--volume={build_path}:/package_folder " f"--volume={result_path}:/test_output " - f"--volume={repo_tests_path}:/usr/share/clickhouse-test " - f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} " + f"--volume={repo_tests_path}/..:/repo " + f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} {run_script}" ) return cmd @@ -128,7 +134,7 @@ def process_results( return state, description, test_results, additional_files -def run_stress_test(docker_image_name: str) -> None: +def run_stress_test(upgrade_check: bool = False) -> None: logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -148,7 +154,7 @@ def run_stress_test(docker_image_name: str) -> None: pr_info = PRInfo() - docker_image = pull_image(get_docker_image(docker_image_name)) + docker_image = pull_image(get_docker_image("clickhouse/stress-test")) packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -177,6 +183,7 @@ def run_stress_test(docker_image_name: str) -> None: additional_envs, ci_logs_args, docker_image, + upgrade_check, ) logging.info("Going to run stress test: %s", run_command) @@ -208,4 +215,4 @@ def run_stress_test(docker_image_name: str) -> None: if __name__ == "__main__": - run_stress_test("clickhouse/stress-test") + run_stress_test() diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 13db50df53f..53b0a0f6c2c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -2,6 +2,7 @@ import logging import os +import signal import sys from io import TextIOWrapper from pathlib import Path @@ -30,20 +31,34 @@ class TeePopen: self._process = None # type: Optional[Popen] self.timeout = timeout self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + logging.warning( + "Timeout exceeded. Send SIGTERM to process %s, timeout %s", + self.process.pid, + self.timeout, + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + sleep(wait) + time_wait += wait while self.process.poll() is None: - logging.warning( - "Killing process %s, timeout %s exceeded", - self.process.pid, - self.timeout, + logging.error( + "Process is still running. Send SIGKILL", ) - os.killpg(self.process.pid, 9) - sleep(10) + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + sleep(5) def __enter__(self) -> "TeePopen": self.process = Popen( @@ -57,6 +72,8 @@ class TeePopen: bufsize=1, errors="backslashreplace", ) + sleep(1) + print(f"Subprocess started, pid [{self.process.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -85,6 +102,12 @@ class TeePopen: return self.process.wait() + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + os.killpg(self.process.pid, signal_num) + @property def process(self) -> Popen: if self._process is not None: diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 525b3bf367b..c3e55aeac06 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -35,10 +35,16 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] must have style-checker(-aarch64) runner", ) elif "binary_" in job.lower() or "package_" in job.lower(): - self.assertTrue( - CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, - f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", - ) + if job.lower() == CI.BuildNames.PACKAGE_AARCH64: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,), + f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner", + ) + else: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER,), + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) elif "aarch64" in job.lower(): self.assertTrue( "aarch" in CI.JOB_CONFIGS[job].runner_type, diff --git a/tests/ci/upgrade_check.py b/tests/ci/upgrade_check.py index 83b6f9e299f..8662611dffe 100644 --- a/tests/ci/upgrade_check.py +++ b/tests/ci/upgrade_check.py @@ -1,4 +1,4 @@ import stress_check if __name__ == "__main__": - stress_check.run_stress_test("clickhouse/upgrade-check") + stress_check.run_stress_test(upgrade_check=True) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 07a7a9601c0..b20b2bb25cf 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -85,6 +85,16 @@ class ClickHouseVersion: self._tweak = 1 return self + def bump_patch(self) -> "ClickHouseVersion": + self._revision += 1 + self._patch += 1 + self._tweak = 1 + return self + + def reset_tweak(self) -> "ClickHouseVersion": + self._tweak = 1 + return self + def major_update(self) -> "ClickHouseVersion": if self._git is not None: self._git.update() @@ -104,13 +114,6 @@ class ClickHouseVersion: self.major, self.minor, self.patch + 1, self.revision, self._git ) - def reset_tweak(self) -> "ClickHouseVersion": - if self._git is not None: - self._git.update() - return ClickHouseVersion( - self.major, self.minor, self.patch, self.revision, self._git, 1 - ) - @property def major(self) -> int: return self._major diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ffdd6169777..220144fb37c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -39,6 +39,7 @@ from errno import ESRCH from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union +from ast import literal_eval as make_tuple try: import termcolor # type: ignore @@ -267,7 +268,7 @@ def clickhouse_execute_http( max_http_retries=5, retry_error_codes=False, ): - if args.secure: + if base_args.secure: client = http.client.HTTPSConnection( host=base_args.tcp_host, port=base_args.http_port, timeout=timeout ) @@ -358,14 +359,89 @@ def clickhouse_execute_json( return rows +# Should we capture client's stacktraces via SIGTSTP +CAPTURE_CLIENT_STACKTRACE = False + + +def kill_process_group(pgid): + print(f"Killing process group {pgid}") + print(f"Processes in process group {pgid}:") + print( + subprocess.check_output( + f"pgrep --pgroup {pgid} -a", shell=True, stderr=subprocess.STDOUT + ).decode("utf-8"), + end="", + ) + try: + if CAPTURE_CLIENT_STACKTRACE: + # Let's try to dump stacktrace in client (useful to catch issues there) + os.killpg(pgid, signal.SIGTSTP) + # Wait some time for clickhouse utilities to gather stacktrace + if RELEASE_NON_SANITIZED: + sleep(0.5) + else: + sleep(10) + # NOTE: this still may leave some processes, that had been + # created by timeout(1), since it also creates new process + # group. But this should not be a problem with default + # options, since the default time for each test is 10min, + # and this is way more bigger then the timeout for each + # timeout(1) invocation. + # + # But as a workaround we are sending SIGTERM first, and + # only after SIGKILL, that way timeout(1) will have an + # ability to terminate childrens (though not always since + # signals are asynchronous). + os.killpg(pgid, signal.SIGTERM) + # We need minimal delay to let processes handle SIGTERM - 0.1 (this may + # not be enough, but at least something) + sleep(0.1) + os.killpg(pgid, signal.SIGKILL) + except OSError as e: + if e.errno == ESRCH: + print(f"Got ESRCH while killing {pgid}. Ignoring.") + else: + raise + print(f"Process group {pgid} should be killed") + + +def cleanup_child_processes(pid): + pgid = os.getpgid(os.getpid()) + print(f"Child processes of {pid}:") + print( + subprocess.check_output( + f"pgrep --parent {pid} -a", shell=True, stderr=subprocess.STDOUT + ).decode("utf-8"), + end="", + ) + # Due to start_new_session=True, it is not enough to kill by PGID, we need + # to look at children processes as well. + # But we are hoping that nobody creates session in the tests (though it is + # possible via timeout(), but we are assuming that they will be killed by + # timeout). + processes = subprocess.check_output( + f"pgrep --parent {pid}", shell=True, stderr=subprocess.STDOUT + ) + processes = processes.decode("utf-8") + processes = processes.strip() + processes = processes.split("\n") + processes = map(lambda x: int(x.strip()), processes) + processes = list(processes) + for child in processes: + child_pgid = os.getpgid(child) + if child_pgid != pgid: + kill_process_group(child_pgid) + + # SIGKILL should not be sent, since this will kill the script itself + os.killpg(pgid, signal.SIGTERM) + + +# send signal to all processes in group to avoid hung check triggering +# (to avoid terminating clickhouse-test itself, the signal should be ignored) def stop_tests(): - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - print("Sending signals") signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) - print("Sending signals DONE") + cleanup_child_processes(os.getpid()) + signal.signal(signal.SIGTERM, signal_handler) def get_db_engine(args, database_name): @@ -836,7 +912,6 @@ class SettingsRandomizer: "cross_join_min_bytes_to_compress": lambda: random.choice([0, 1, 100000000]), "min_external_table_block_size_bytes": lambda: random.choice([0, 1, 100000000]), "max_parsing_threads": lambda: random.choice([0, 1, 10]), - "trace_profile_events": lambda: random.randint(0, 1), "optimize_functions_to_subcolumns": lambda: random.randint(0, 1), } @@ -1070,9 +1145,24 @@ class TestCase: return description + "\n" + def apply_random_settings_limits(self, random_settings): + for setting in random_settings: + if setting in self.random_settings_limits: + min_value = self.random_settings_limits[setting][0] + if min_value and random_settings[setting] < min_value: + random_settings[setting] = min_value + max_value = self.random_settings_limits[setting][1] + if max_value and random_settings[setting] > max_value: + random_settings[setting] = max_value + def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set() + self.random_settings_limits = ( + suite.all_random_settings_limits[case] + if case in suite.all_random_settings_limits + else {} + ) for tag in os.getenv("GLOBAL_TAGS", "").split(","): self.tags.add(tag.strip()) @@ -1114,11 +1204,13 @@ class TestCase: if self.randomize_settings: self.random_settings = SettingsRandomizer.get_random_settings(args) + self.apply_random_settings_limits(self.random_settings) if self.randomize_merge_tree_settings: self.merge_tree_random_settings = ( MergeTreeSettingsRandomizer.get_random_settings(args) ) + self.apply_random_settings_limits(self.merge_tree_random_settings) self.base_url_params = ( os.environ["CLICKHOUSE_URL_PARAMS"] @@ -1249,39 +1341,35 @@ class TestCase: return None - def process_result_impl( - self, proc, stdout: str, stderr: str, debug_log: str, total_time: float - ): + def process_result_impl(self, proc, total_time: float): + if proc: + if proc.returncode is None: + kill_process_group(os.getpgid(proc.pid)) + description = "" + debug_log = "" + if os.path.exists(self.testcase_args.debug_log_file): + with open(self.testcase_args.debug_log_file, "rb") as stream: + debug_log += self.testcase_args.debug_log_file + ":\n" + debug_log += str(stream.read(), errors="replace", encoding="utf-8") + debug_log += "\n" + + stdout = "" + if os.path.exists(self.stdout_file): + with open(self.stdout_file, "rb") as stdfd: + stdout = str(stdfd.read(), errors="replace", encoding="utf-8") + + stderr = "" + if os.path.exists(self.stderr_file): + with open(self.stderr_file, "rb") as stdfd: + stderr += str(stdfd.read(), errors="replace", encoding="utf-8") + if debug_log: debug_log = "\n".join(debug_log.splitlines()[:100]) if proc: if proc.returncode is None: - try: - pgid = os.getpgid(proc.pid) - # NOTE: this still may leave some processes, that had been - # created by timeout(1), since it also creates new process - # group. But this should not be a problem with default - # options, since the default time for each test is 10min, - # and this is way more bigger then the timeout for each - # timeout(1) invocation. - # - # But as a workaround we are sending SIGTERM first, and - # only after SIGKILL, that way timeout(1) will have an - # ability to terminate childrens (though not always since - # signals are asynchronous). - os.killpg(pgid, signal.SIGTERM) - # This may not be enough, but this is at least something - # (and anyway it is OK to spend 0.1 second more in case of - # test timeout). - sleep(0.1) - os.killpg(pgid, signal.SIGKILL) - except OSError as e: - if e.errno != ESRCH: - raise - if stderr: description += stderr if debug_log: @@ -1533,7 +1621,7 @@ class TestCase: def run_single_test( self, server_logs_level, client_options - ) -> Tuple[Optional[Popen], str, str, str, float]: + ) -> Tuple[Optional[Popen], float]: args = self.testcase_args client = args.testcase_client start_time = args.testcase_start_time @@ -1610,13 +1698,6 @@ class TestCase: # Whether the test timed out will be decided later pass - debug_log = "" - if os.path.exists(self.testcase_args.debug_log_file): - with open(self.testcase_args.debug_log_file, "rb") as stream: - debug_log += self.testcase_args.debug_log_file + ":\n" - debug_log += str(stream.read(), errors="replace", encoding="utf-8") - debug_log += "\n" - total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. @@ -1668,17 +1749,7 @@ class TestCase: "https://localhost:8443/", ) - stdout = "" - if os.path.exists(self.stdout_file): - with open(self.stdout_file, "rb") as stdfd: - stdout = str(stdfd.read(), errors="replace", encoding="utf-8") - - stderr = "" - if os.path.exists(self.stderr_file): - with open(self.stderr_file, "rb") as stdfd: - stderr += str(stdfd.read(), errors="replace", encoding="utf-8") - - return proc, stdout, stderr, debug_log, total_time + return proc, total_time def run(self, args, suite, client_options, server_logs_level): start_time = datetime.now() @@ -1710,14 +1781,14 @@ class TestCase: if not is_valid_utf_8(self.case_file) or ( self.reference_file and not is_valid_utf_8(self.reference_file) ): - proc, stdout, stderr, debug_log, total_time = self.run_single_test( + proc, total_time = self.run_single_test( server_logs_level, client_options ) - result = self.process_result_impl( - proc, stdout, stderr, debug_log, total_time + result = self.process_result_impl(proc, total_time) + result.check_if_need_retry( + args, result.description, result.description, self.runs_count ) - result.check_if_need_retry(args, stdout, stderr, self.runs_count) # to avoid breaking CSV parser result.description = result.description.replace("\0", "") else: @@ -1735,17 +1806,16 @@ class TestCase: ): ( proc, - stdout, - stderr, - debug_log, total_time, ) = self.run_single_test(server_logs_level, client_options) - result = self.process_result_impl( - proc, stdout, stderr, debug_log, total_time - ) + result = self.process_result_impl(proc, total_time) + result.check_if_need_retry( - args, stdout, stderr, self.runs_count + args, + result.description, + result.description, + self.runs_count, ) # to avoid breaking CSV parser result.description = result.description.replace("\0", "") @@ -1910,7 +1980,9 @@ class TestSuite: return test_name @staticmethod - def read_test_tags(suite_dir: str, all_tests: List[str]) -> Dict[str, Set[str]]: + def read_test_tags_and_random_settings_limits( + suite_dir: str, all_tests: List[str] + ) -> (Dict[str, Set[str]], Dict[str, Dict[str, Tuple[int, int]]]): def get_comment_sign(filename): if filename.endswith(".sql") or filename.endswith(".sql.j2"): return "--" @@ -1935,27 +2007,58 @@ class TestSuite: tags = {tag.strip() for tag in tags} return tags - def is_shebang(line: str) -> bool: - return line.startswith("#!") + def parse_random_settings_limits_from_line( + line, comment_sign + ) -> Dict[str, Tuple[int, int]]: + if not line.startswith(comment_sign): + return {} + random_settings_limits_str = line[len(comment_sign) :].lstrip() + random_settings_limits_prefix = "Random settings limits:" + if not random_settings_limits_str.startswith(random_settings_limits_prefix): + return {} + random_settings_limits_str = random_settings_limits_str[ + len(random_settings_limits_prefix) : + ] + # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...' + random_settings_limits = {} + for setting_and_limit in random_settings_limits_str.split(";"): + setting_and_limit = setting_and_limit.split("=") + random_settings_limits[setting_and_limit[0].strip()] = make_tuple( + setting_and_limit[1] + ) + return random_settings_limits - def find_tag_line(file): - for line in file: - line = line.strip() - if line and not is_shebang(line): + def find_tag_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Tags:"): return line return "" - def load_tags_from_file(filepath): + def find_random_settings_limits_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Random settings limits:"): + return line + return "" + + def load_tags_and_random_settings_limits_from_file(filepath): comment_sign = get_comment_sign(filepath) need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - tag_line = find_tag_line(file) + lines = file.readlines() + tag_line = find_tag_line(lines, comment_sign) + random_settings_limits_line = find_random_settings_limits_line( + lines, comment_sign + ) except UnicodeDecodeError: - return [] + return [], {} try: if filepath.endswith(".sql"): - for line in file: + for line in lines: if "{CLICKHOUSE_DATABASE" in line: need_query_params = True except UnicodeDecodeError: @@ -1963,18 +2066,31 @@ class TestSuite: parsed_tags = parse_tags_from_line(tag_line, comment_sign) if need_query_params: parsed_tags.add("need-query-parameters") - return parsed_tags + random_settings_limits = parse_random_settings_limits_from_line( + random_settings_limits_line, comment_sign + ) + return parsed_tags, random_settings_limits all_tags = {} + all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags = load_tags_from_file(os.path.join(suite_dir, test_name)) + ( + tags, + random_settings_limits, + ) = load_tags_and_random_settings_limits_from_file( + os.path.join(suite_dir, test_name) + ) # noqa: ignore E203 if tags: all_tags[test_name] = tags + if random_settings_limits: + all_random_settings_limits[test_name] = random_settings_limits elapsed = (datetime.now() - start_time).total_seconds() if elapsed > 1: - print(f"Tags for suite {suite_dir} read in {elapsed:.2f} seconds") - return all_tags + print( + f"Tags and random settings limits for suite {suite_dir} read in {elapsed:.2f} seconds" + ) + return all_tags, all_random_settings_limits def __init__(self, args, suite_path: str, suite_tmp_path: str, suite: str): self.args = args @@ -2004,10 +2120,16 @@ class TestSuite: self.all_tests: List[str] = self.get_tests_list( self.tests_in_suite_key_func, filter_func ) - self.all_tags: Dict[str, Set[str]] = self.read_test_tags( - self.suite_path, self.all_tests - ) + all_tags_and_random_settings_limits = ( + self.read_test_tags_and_random_settings_limits( + self.suite_path, self.all_tests + ) + ) + self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( + all_tags_and_random_settings_limits[1] + ) self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: @@ -2354,7 +2476,13 @@ class BuildFlags: POLYMORPHIC_PARTS = "polymorphic-parts" +# Release and non-sanitizer build +RELEASE_NON_SANITIZED = False + + def collect_build_flags(args): + global RELEASE_NON_SANITIZED + result = [] value = clickhouse_execute( @@ -2379,6 +2507,8 @@ def collect_build_flags(args): elif b"RelWithDebInfo" in value or b"Release" in value: result.append(BuildFlags.RELEASE) + RELEASE_NON_SANITIZED = result == [BuildFlags.RELEASE] + value = clickhouse_execute( args, "SELECT value FROM system.settings WHERE name = 'allow_deprecated_database_ordinary'", @@ -2511,12 +2641,12 @@ def do_run_tests(jobs, test_suite: TestSuite): try: clickhouse_execute( args, - query="SELECT 1 /*hang up check*/", - max_http_retries=5, - timeout=20, + query="SELECT 1 /*hung check*/", + max_http_retries=20, + timeout=10, ) except Exception: - print("Hang up check failed") + print("Hung check failed") server_died.set() if server_died.is_set(): @@ -3391,29 +3521,36 @@ def parse_args(): default="./client.fatal.log", help="Path to file for fatal logs from client", ) + parser.add_argument( + "--capture-client-stacktrace", + action="store_true", + help="Capture stacktraces from clickhouse-client/local on errors", + ) return parser.parse_args() class Terminated(KeyboardInterrupt): - pass + def __init__(self, signal): + self.signal = signal -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") +def signal_handler(signal, frame): + raise Terminated(signal) if __name__ == "__main__": + # Move to a new process group and kill it at exit so that we don't have any + # infinite tests processes left + # (new process group is required to avoid killing some parent processes) + os.setpgid(0, 0) + stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() - # Move to a new process group and kill it at exit so that we don't have any - # infinite tests processes left - # (new process group is required to avoid killing some parent processes) - os.setpgid(0, 0) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGHUP, signal_handler) @@ -3429,7 +3566,9 @@ if __name__ == "__main__": f"Cannot access the specified directory with queries ({args.queries})", file=sys.stderr, ) - sys.exit(1) + assert False, "No --queries provided" + + CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace # Autodetect the directory with queries if not specified if args.queries is None: @@ -3552,4 +3691,14 @@ if __name__ == "__main__": if args.replace_replicated_with_shared: args.s3_storage = True - main(args) + try: + main(args) + except ServerDied as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + except Terminated as e: + print(f"Terminated with {e.signal} signal", file=sys.stderr) + sys.exit(128 + e.signal) + except KeyboardInterrupt: + print("Interrupted") + sys.exit(128 + signal.SIGINT) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index e106e3a0e6b..091071f0637 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -27,6 +27,7 @@ 0.30.150.15 + 0 cache @@ -37,6 +38,7 @@ 100 0 0 + 0 diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml index 5f45640a923..08d78900229 100644 --- a/tests/config/config.d/storage_conf_02944.xml +++ b/tests/config/config.d/storage_conf_02944.xml @@ -19,6 +19,7 @@ 10 100 0 + 0 diff --git a/tests/config/config.d/transactions.xml b/tests/config/config.d/transactions.xml index 9948b1f1865..64e166b81b5 100644 --- a/tests/config/config.d/transactions.xml +++ b/tests/config/config.d/transactions.xml @@ -1,4 +1,4 @@ - + 42 @@ -18,4 +18,4 @@ 0.01 - + diff --git a/tests/config/install.sh b/tests/config/install.sh index 7c4b36dc4bd..fda74bd7a8d 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -94,6 +94,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/limits.yaml $DEST_SERVER_PATH/users.d/ if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml new file mode 100644 index 00000000000..53cbbfa744a --- /dev/null +++ b/tests/config/users.d/limits.yaml @@ -0,0 +1,56 @@ +profiles: + default: + max_memory_usage: 5G + max_rows_to_read: 20000000 + + # Also set every other limit to a high value, so it will not limit anything, but we will test that code around it. + s3_max_get_rps: 1000000 + s3_max_get_burst: 2000000 + s3_max_put_rps: 1000000 + s3_max_put_burst: 2000000 + max_remote_read_network_bandwidth: 1T + max_remote_write_network_bandwidth: 1T + max_local_read_bandwidth: 1T + max_local_write_bandwidth: 1T + use_index_for_in_with_subqueries_max_values: 1G + max_bytes_to_read: 1T + max_bytes_to_read_leaf: 1T + max_rows_to_group_by: 10G + max_bytes_before_external_group_by: 10G + max_rows_to_sort: 10G + max_bytes_to_sort: 10G + max_bytes_before_external_sort: 10G + max_result_rows: 1G + max_result_bytes: 1G + max_execution_time: 600 + max_execution_time_leaf: 600 + max_execution_speed: 100G + max_execution_speed_bytes: 10T + timeout_before_checking_execution_speed: 300 + max_estimated_execution_time: 600 + max_columns_to_read: 20K + max_temporary_columns: 20K + max_temporary_non_const_columns: 20K + max_rows_in_set: 10G + max_bytes_in_set: 10G + max_rows_in_join: 10G + max_bytes_in_join: 10G + max_rows_to_transfer: 1G + max_bytes_to_transfer: 1G + max_rows_in_distinct: 10G + max_bytes_in_distinct: 10G + max_memory_usage_for_user: 32G + max_network_bandwidth: 100G + max_network_bytes: 1T + max_network_bandwidth_for_user: 100G + max_network_bandwidth_for_all_users: 100G + max_temporary_data_on_disk_size_for_user: 100G + max_temporary_data_on_disk_size_for_query: 100G + max_backup_bandwidth: 100G + max_hyperscan_regexp_length: 1M + max_hyperscan_regexp_total_length: 10M + query_cache_max_size_in_bytes: 10M + query_cache_max_entries: 100K + external_storage_max_read_rows: 10G + external_storage_max_read_bytes: 10G + max_streams_for_merge_tree_reading: 1000 diff --git a/docker/test/stateless/attach_gdb.lib b/tests/docker_scripts/attach_gdb.lib similarity index 98% rename from docker/test/stateless/attach_gdb.lib rename to tests/docker_scripts/attach_gdb.lib index 2f1375a2f0f..4170a19176c 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/tests/docker_scripts/attach_gdb.lib @@ -1,7 +1,7 @@ #!/bin/bash # shellcheck source=./utils.lib -source /utils.lib +source /repo/tests/docker_scripts/utils.lib function attach_gdb_to_clickhouse() { diff --git a/docker/test/stateful/create.sql b/tests/docker_scripts/create.sql similarity index 100% rename from docker/test/stateful/create.sql rename to tests/docker_scripts/create.sql diff --git a/docker/test/fasttest/run.sh b/tests/docker_scripts/fasttest_runner.sh similarity index 95% rename from docker/test/fasttest/run.sh rename to tests/docker_scripts/fasttest_runner.sh index 394d31addb1..1eaba2c7cdf 100755 --- a/docker/test/fasttest/run.sh +++ b/tests/docker_scripts/fasttest_runner.sh @@ -256,22 +256,6 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function run_tests { clickhouse-server --version @@ -340,8 +324,8 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - timeout_with_logging 35m bash -c run_tests ||: - /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ + run_tests ||: + /repo/tests/docker_scripts/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" ;; diff --git a/docker/test/util/process_functional_tests_result.py b/tests/docker_scripts/process_functional_tests_result.py similarity index 92% rename from docker/test/util/process_functional_tests_result.py rename to tests/docker_scripts/process_functional_tests_result.py index aa2ea686c46..1dc3090484c 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/tests/docker_scripts/process_functional_tests_result.py @@ -32,7 +32,7 @@ def process_test_log(log_path, broken_tests): success_finish = False test_results = [] test_end = True - with open(log_path, "r") as test_file: + with open(log_path, "r", encoding="utf-8") as test_file: for line in test_file: original_line = line line = line.strip() @@ -116,7 +116,7 @@ def process_test_log(log_path, broken_tests): test[0], test[1], test[2], - "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"), + "".join(test[3])[:8192].replace("\t", "\\t").replace("\n", "\\n"), ] for test in test_results ] @@ -150,7 +150,7 @@ def process_result(result_path, broken_tests): if result_path and os.path.exists(result_path): ( - total, + _total, skipped, unknown, failed, @@ -191,11 +191,11 @@ def process_result(result_path, broken_tests): else: description = "" - description += "fail: {}, passed: {}".format(failed, success) + description += f"fail: {failed}, passed: {success}" if skipped != 0: - description += ", skipped: {}".format(skipped) + description += f", skipped: {skipped}" if unknown != 0: - description += ", unknown: {}".format(unknown) + description += f", unknown: {unknown}" else: state = "failure" description = "Output log doesn't exist" @@ -205,10 +205,10 @@ def process_result(result_path, broken_tests): def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, "w") as f: + with open(status_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow(status) @@ -221,15 +221,15 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt") + parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt") args = parser.parse_args() - broken_tests = list() + broken_tests = [] if os.path.exists(args.broken_tests): - logging.info(f"File {args.broken_tests} with broken tests found") - with open(args.broken_tests) as f: + print(f"File {args.broken_tests} with broken tests found") + with open(args.broken_tests, encoding="utf-8") as f: broken_tests = f.read().splitlines() - logging.info(f"Broken tests in the list: {len(broken_tests)}") + print(f"Broken tests in the list: {len(broken_tests)}") state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/tests/docker_scripts/setup_hdfs_minicluster.sh similarity index 95% rename from docker/test/stateless/setup_hdfs_minicluster.sh rename to tests/docker_scripts/setup_hdfs_minicluster.sh index 15a54f59096..622270ba5d5 100755 --- a/docker/test/stateless/setup_hdfs_minicluster.sh +++ b/tests/docker_scripts/setup_hdfs_minicluster.sh @@ -5,7 +5,7 @@ set -e -x -a -u ls -lha -cd hadoop-3.3.1 +cd /hadoop-3.3.1 export JAVA_HOME=/usr mkdir -p target/test/data diff --git a/docker/test/stateless/setup_minio.sh b/tests/docker_scripts/setup_minio.sh similarity index 87% rename from docker/test/stateless/setup_minio.sh rename to tests/docker_scripts/setup_minio.sh index 2b9433edd20..40e93e713a1 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/tests/docker_scripts/setup_minio.sh @@ -59,8 +59,8 @@ find_os() { download_minio() { local os local arch - local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z} - local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z} + local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z} + local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z} os=$(find_os) arch=$(find_arch) @@ -82,10 +82,10 @@ setup_minio() { local test_type=$1 ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse ./mc admin user add clickminio test testtest - ./mc admin policy set clickminio readwrite user=test + ./mc admin policy attach clickminio readwrite --user=test ./mc mb --ignore-existing clickminio/test if [ "$test_type" = "stateless" ]; then - ./mc policy set public clickminio/test + ./mc anonymous set public clickminio/test fi } @@ -99,10 +99,9 @@ upload_data() { # iterating over globs will cause redundant file variable to be # a path to a file, not a filename # shellcheck disable=SC2045 - for file in $(ls "${data_path}"); do - echo "${file}"; - ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}"; - done + if [ -d "${data_path}" ]; then + ./mc cp --recursive "${data_path}"/ clickminio/test/ + fi } setup_aws_credentials() { @@ -144,8 +143,8 @@ main() { fi start_minio setup_minio "$1" - upload_data "${query_dir}" "${2:-/usr/share/clickhouse-test}" + upload_data "${query_dir}" "${2:-/repo/tests/}" setup_aws_credentials } -main "$@" \ No newline at end of file +main "$@" diff --git a/docker/test/sqllogic/run.sh b/tests/docker_scripts/sqllogic_runner.sh similarity index 85% rename from docker/test/sqllogic/run.sh rename to tests/docker_scripts/sqllogic_runner.sh index ccba344035e..8b8f1e7aec7 100755 --- a/docker/test/sqllogic/run.sh +++ b/tests/docker_scripts/sqllogic_runner.sh @@ -15,10 +15,10 @@ echo "Files in current directory" ls -la ./ echo "Files in root directory" ls -la / -echo "Files in /clickhouse-tests directory" -ls -la /clickhouse-tests -echo "Files in /clickhouse-tests/sqllogic directory" -ls -la /clickhouse-tests/sqllogic +echo "Files in /repo/tests directory" +ls -la /repo/tests +echo "Files in /repo/tests/sqllogic directory" +ls -la /repo/tests/sqllogic echo "Files in /package_folder directory" ls -la /package_folder echo "Files in /test_output" @@ -45,13 +45,13 @@ function run_tests() cd /test_output - /clickhouse-tests/sqllogic/runner.py --help 2>&1 \ + /repo/tests/sqllogic/runner.py --help 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' mkdir -p /test_output/self-test - /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ + /repo/tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ self-test \ - --self-test-dir /clickhouse-tests/sqllogic/self-test \ + --self-test-dir /repo/tests/sqllogic/self-test \ --out-dir /test_output/self-test \ 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' @@ -63,7 +63,7 @@ function run_tests() if [ -d /sqllogictest ] then mkdir -p /test_output/statements-test - /clickhouse-tests/sqllogic/runner.py \ + /repo/tests/sqllogic/runner.py \ --log-file /test_output/runner-statements-test.log \ --log-level info \ statements-test \ @@ -77,7 +77,7 @@ function run_tests() tar -zcvf statements-check.tar.gz statements-test 1>/dev/null mkdir -p /test_output/complete-test - /clickhouse-tests/sqllogic/runner.py \ + /repo/tests/sqllogic/runner.py \ --log-file /test_output/runner-complete-test.log \ --log-level info \ complete-test \ @@ -94,7 +94,7 @@ function run_tests() export -f run_tests -timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 +run_tests #/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/stateful/run.sh b/tests/docker_scripts/stateful_runner.sh similarity index 88% rename from docker/test/stateful/run.sh rename to tests/docker_scripts/stateful_runner.sh index 8e2f1890f89..86f6a299ad3 100755 --- a/docker/test/stateful/run.sh +++ b/tests/docker_scripts/stateful_runner.sh @@ -4,9 +4,6 @@ source /setup_export_logs.sh set -e -x -MAX_RUN_TIME=${MAX_RUN_TIME:-3600} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME)) - # Choose random timezone for this test run TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" @@ -17,17 +14,17 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib # install test configs -/usr/share/clickhouse-test/config/install.sh +/repo/tests/config/install.sh azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & -./setup_minio.sh stateful +/repo/tests/docker_scripts/setup_minio.sh stateful ./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! @@ -108,7 +105,7 @@ setup_logs_replication clickhouse-client --query "SHOW DATABASES" clickhouse-client --query "CREATE DATABASE datasets" -clickhouse-client --multiquery < create.sql +clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql clickhouse-client --query "SHOW TABLES FROM datasets" if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -118,14 +115,11 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "CREATE TABLE test.hits AS datasets.hits_v1" clickhouse-client --query "CREATE TABLE test.visits AS datasets.visits_v1" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" @@ -191,8 +185,8 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else @@ -200,7 +194,8 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + # AWS S3 is very inefficient, so increase memory even further: + clickhouse-client --max_memory_usage 30G --max_memory_usage_for_user 30G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" @@ -232,45 +227,40 @@ function run_tests() set +e + TEST_ARGS=( + -j 2 + --testname + --shard + --zookeeper + --check-zookeeper-session + --no-stateless + --hung-check + --print-time + --capture-client-stacktrace + --queries "/repo/tests/queries" + "${ADDITIONAL_OPTIONS[@]}" + "$SKIP_TESTS_OPTION" + ) if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then - clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ - --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \ - -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ - "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt - else - clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ - "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + TEST_ARGS+=( + --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" + --no-parallel-replicas + ) fi + clickhouse-test "${TEST_ARGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt set -e } export -f run_tests -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - -TIMEOUT=$((MAX_RUN_TIME - 700)) -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ echo "Files in root directory" ls -la / -/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv sudo clickhouse stop ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stateless/run.sh b/tests/docker_scripts/stateless_runner.sh similarity index 82% rename from docker/test/stateless/run.sh rename to tests/docker_scripts/stateless_runner.sh index 788bddd811d..d8921a04458 100755 --- a/docker/test/stateless/run.sh +++ b/tests/docker_scripts/stateless_runner.sh @@ -1,10 +1,13 @@ #!/bin/bash +# fail on errors, verbose and export all env variables +set -e -x -a + # shellcheck disable=SC1091 source /setup_export_logs.sh # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib # Avoid overlaps with previous runs dmesg --clear @@ -12,9 +15,6 @@ dmesg --clear # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-9000} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME)) - USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -42,22 +42,22 @@ if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then chc --version || exit 1 fi -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -sf /repo/tests/clickhouse-test /usr/bin/clickhouse-test + +export CLICKHOUSE_GRPC_CLIENT="/repo/utils/grpc-client/clickhouse-grpc-client.py" # shellcheck disable=SC1091 -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib # install test configs -/usr/share/clickhouse-test/config/install.sh +/repo/tests/config/install.sh -./setup_minio.sh stateless -./mc admin trace clickminio > /test_output/minio.log & -MC_ADMIN_PID=$! +/repo/tests/docker_scripts/setup_minio.sh stateless -./setup_hdfs_minicluster.sh +/repo/tests/docker_scripts/setup_hdfs_minicluster.sh config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -80,6 +80,9 @@ fi export IS_FLAKY_CHECK=0 +# Export NUM_TRIES so python scripts will see its value as env variable +export NUM_TRIES + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export IS_FLAKY_CHECK=1 @@ -176,6 +179,55 @@ done setup_logs_replication attach_gdb_to_clickhouse +# create tables for minio log webhooks +clickhouse-client --query "CREATE TABLE minio_audit_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" + +clickhouse-client --query "CREATE TABLE minio_server_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" + +# create minio log webhooks for both audit and server logs +# use async inserts to avoid creating too many parts +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 + +max_retries=100 +retry=1 +while [ $retry -le $max_retries ]; do + echo "clickminio restart attempt $retry:" + + output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status) + echo "Output of restart status: $output" + + expected_output="success +success" + if [ "$output" = "$expected_output" ]; then + echo "Restarted clickminio successfully." + break + fi + + sleep 1 + + retry=$((retry + 1)) +done + +if [ $retry -gt $max_retries ]; then + echo "Failed to restart clickminio after $max_retries attempts." +fi + +./mc admin trace clickminio > /test_output/minio.log & +MC_ADMIN_PID=$! + function fn_exists() { declare -F "$1" > /dev/null; } @@ -261,44 +313,44 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" - TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) - START_TIME=${SECONDS} set +e - timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \ - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ - | ts '%Y-%m-%d %H:%M:%S' \ - | tee -a test_output/test_result.txt - set -e - DURATION=$((SECONDS - START_TIME)) - echo "Elapsed ${DURATION} seconds." - if [[ $DURATION -ge $TIMEOUT ]] - then - echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." - fi + TEST_ARGS=( + --testname + --shard + --zookeeper + --check-zookeeper-session + --hung-check + --print-time + --no-drop-if-fail + --capture-client-stacktrace + --queries "/repo/tests/queries" + --test-runs "$NUM_TRIES" + "${ADDITIONAL_OPTIONS[@]}" + ) + clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' \ + | tee -a test_output/test_result.txt + set -e } export -f run_tests - -# This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ echo "Files in root directory" ls -la / -/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv clickhouse-client -q "system flush logs" ||: @@ -328,6 +380,14 @@ do fi done + +# collect minio audit and server logs +# wait for minio to flush its batch if it has any +sleep 1 +clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" + # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. diff --git a/docker/test/stress/run.sh b/tests/docker_scripts/stress_runner.sh old mode 100644 new mode 100755 similarity index 97% rename from docker/test/stress/run.sh rename to tests/docker_scripts/stress_runner.sh index b21114e456f..039c60c8e4e --- a/docker/test/stress/run.sh +++ b/tests/docker_scripts/stress_runner.sh @@ -3,26 +3,24 @@ # shellcheck disable=SC2086 # shellcheck disable=SC2024 +set -x + # Avoid overlaps with previous runs dmesg --clear # shellcheck disable=SC1091 source /setup_export_logs.sh -set -x - -# we mount tests folder from repo to /usr/share -ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib # shellcheck source=../stateless/attach_gdb.lib -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib install_packages package_folder @@ -55,7 +53,7 @@ export ZOOKEEPER_FAULT_INJECTION=1 # available for dump via clickhouse-local configure -./setup_minio.sh stateless # to have a proper environment +/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -64,7 +62,7 @@ start_server setup_logs_replication clickhouse-client --query "CREATE DATABASE datasets" -clickhouse-client --multiquery < create.sql +clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" @@ -267,7 +265,8 @@ fi start_server -stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ +cd /repo/tests/ || exit 1 # clickhouse-test can find queries dir from there +python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv diff --git a/docker/test/stateless/stress_tests.lib b/tests/docker_scripts/stress_tests.lib similarity index 98% rename from docker/test/stateless/stress_tests.lib rename to tests/docker_scripts/stress_tests.lib index 51aa299f7a6..4f3e6eeb2f4 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/tests/docker_scripts/stress_tests.lib @@ -42,7 +42,7 @@ function configure() # install test configs export USE_DATABASE_ORDINARY=1 export EXPORT_S3_STORAGE_POLICIES=1 - /usr/share/clickhouse-test/config/install.sh + /repo/tests/config/install.sh # avoid too slow startup sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ @@ -273,7 +273,7 @@ function check_logs_for_critical_errors() [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt # Crash - rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + rg -Fa "###################""#####################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ || echo -e "Not crashed$OK" >> /test_output/test_results.tsv @@ -285,7 +285,7 @@ function check_logs_for_critical_errors() # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - rg -Faz "########################################" /test_output/* > /dev/null \ + rg -Faz "####################""####################" /test_output/* > /dev/null \ && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv function get_gdb_log_context() diff --git a/docker/test/upgrade/run.sh b/tests/docker_scripts/upgrade_runner.sh old mode 100644 new mode 100755 similarity index 93% rename from docker/test/upgrade/run.sh rename to tests/docker_scripts/upgrade_runner.sh index a4c4c75e5b3..ece75ebf782 --- a/docker/test/upgrade/run.sh +++ b/tests/docker_scripts/upgrade_runner.sh @@ -9,20 +9,20 @@ dmesg --clear set -x # we mount tests folder from repo to /usr/share -ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test -ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages -ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag +ln -s /repo/tests/ci/stress.py /usr/bin/stress +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/ci/download_release_packages.py /usr/bin/download_release_packages +ln -s /repo/tests/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib # shellcheck source=../stateless/attach_gdb.lib -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -./setup_minio.sh stateless # to have a proper environment +/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment echo "Get previous release tag" # shellcheck disable=SC2016 @@ -129,6 +129,7 @@ configure # Check that all new/changed setting were added in settings changes history. # Some settings can be different for builds with sanitizers, so we check +# Also the automatic value of 'max_threads' and similar was displayed as "'auto(...)'" in previous versions instead of "auto(...)". # settings changes only for non-sanitizer builds. IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") if [ "${IS_SANITIZED}" -eq "0" ] @@ -145,7 +146,9 @@ then old_settings.value AS old_value FROM new_settings LEFT JOIN old_settings ON new_settings.name = old_settings.name - WHERE (new_settings.value != old_settings.value) AND (name NOT IN ( + WHERE (new_value != old_value) + AND NOT (startsWith(new_value, 'auto(') AND old_value LIKE '%auto(%') + AND (name NOT IN ( SELECT arrayJoin(tupleElement(changes, 'name')) FROM ( @@ -177,7 +180,7 @@ then if [ -s changed_settings.txt ] then mv changed_settings.txt /test_output/ - echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv + echo -e "Changed settings are not reflected in the settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv else echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv fi diff --git a/docker/test/stateless/utils.lib b/tests/docker_scripts/utils.lib similarity index 69% rename from docker/test/stateless/utils.lib rename to tests/docker_scripts/utils.lib index cb257536c36..31cd67254b4 100644 --- a/docker/test/stateless/utils.lib +++ b/tests/docker_scripts/utils.lib @@ -40,22 +40,6 @@ function fn_exists() { declare -F "$1" > /dev/null; } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function collect_core_dumps() { find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md deleted file mode 100644 index 6b5b161b2d5..00000000000 --- a/tests/fuzz/README.md +++ /dev/null @@ -1,23 +0,0 @@ -The list of functions generated via the following query - -``` - clickhouse client -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > functions.dict -``` - -The list of datatypes generated via the following query: - -``` - clickhouse client -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > datatypes.dict -``` - -The list of keywords generated via the following query: - -``` - clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > key_words.dict -``` - -Then merge all dictionaries into one (all.dict) - -``` - cat ./dictionaries/* | sort | uniq > all.dict -``` \ No newline at end of file diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index f08e319f0d4..30af3746fca 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -1,38 +1,991 @@ +"ADD COLUMN" +"ADD CONSTRAINT" +"ADD INDEX" +"ADD PROJECTION" +"ADD STATISTICS" +"ADD" +"ADMIN OPTION FOR" +"AFTER" +"ALGORITHM" +"ALIAS" +"ALL" +"ALLOWED_LATENESS" +"ALTER COLUMN" +"ALTER DATABASE" +"ALTER LIVE VIEW" +"ALTER POLICY" +"ALTER PROFILE" +"ALTER QUOTA" +"ALTER ROLE" +"ALTER ROW POLICY" +"ALTER SETTINGS PROFILE" +"ALTER TABLE" +"ALTER TEMPORARY TABLE" +"ALTER USER" +"ALTER" +"AND STDOUT" +"AND" +"ANTI" +"ANY" +"APPEND" +"APPLY DELETED MASK" +"APPLY" +"ARRAY JOIN" +"AS" +"ASC" +"ASCENDING" +"ASOF" +"ASSUME" +"AST" +"ASYNC" +"ATTACH PART" +"ATTACH PARTITION" +"ATTACH POLICY" +"ATTACH PROFILE" +"ATTACH QUOTA" +"ATTACH ROLE" +"ATTACH ROW POLICY" +"ATTACH SETTINGS PROFILE" +"ATTACH USER" +"ATTACH" +"AUTO_INCREMENT" +"AZURE" +"AggregateFunction" +"Array" +"BACKUP" +"BCRYPT_HASH" +"BCRYPT_PASSWORD" +"BEGIN TRANSACTION" +"BETWEEN" +"BIDIRECTIONAL" +"BIGINT SIGNED" +"BIGINT UNSIGNED" +"BIGINT" +"BINARY LARGE OBJECT" +"BINARY VARYING" +"BINARY" +"BIT" +"BIT_AND" +"BIT_ANDArgMax" +"BIT_ANDArgMin" +"BIT_ANDArray" +"BIT_ANDDistinct" +"BIT_ANDForEach" +"BIT_ANDIf" +"BIT_ANDMap" +"BIT_ANDMerge" +"BIT_ANDNull" +"BIT_ANDOrDefault" +"BIT_ANDOrNull" +"BIT_ANDResample" +"BIT_ANDSimpleState" +"BIT_ANDState" +"BIT_OR" +"BIT_ORArgMax" +"BIT_ORArgMin" +"BIT_ORArray" +"BIT_ORDistinct" +"BIT_ORForEach" +"BIT_ORIf" +"BIT_ORMap" +"BIT_ORMerge" +"BIT_ORNull" +"BIT_OROrDefault" +"BIT_OROrNull" +"BIT_ORResample" +"BIT_ORSimpleState" +"BIT_ORState" +"BIT_XOR" +"BIT_XORArgMax" +"BIT_XORArgMin" +"BIT_XORArray" +"BIT_XORDistinct" +"BIT_XORForEach" +"BIT_XORIf" +"BIT_XORMap" +"BIT_XORMerge" +"BIT_XORNull" +"BIT_XOROrDefault" +"BIT_XOROrNull" +"BIT_XORResample" +"BIT_XORSimpleState" +"BIT_XORState" +"BLAKE3" +"BLOB" +"BOTH" +"BY" +"BYTE" +"BYTEA" +"Bool" +"CASCADE" +"CASE" +"CAST" +"CHANGE" +"CHANGEABLE_IN_READONLY" +"CHANGED" +"CHAR LARGE OBJECT" +"CHAR VARYING" +"CHAR" +"CHARACTER LARGE OBJECT" +"CHARACTER VARYING" +"CHARACTER" +"CHARACTER_LENGTH" +"CHAR_LENGTH" +"CHECK ALL TABLES" +"CHECK TABLE" +"CHECK" +"CLEANUP" +"CLEAR COLUMN" +"CLEAR INDEX" +"CLEAR PROJECTION" +"CLEAR STATISTICS" +"CLOB" +"CLUSTER" +"CLUSTERS" +"CN" +"CODEC" +"COLLATE" +"COLUMN" +"COLUMNS" +"COMMENT COLUMN" +"COMMENT" +"COMMIT" +"COMPRESSION" +"CONST" +"CONSTRAINT" +"COVAR_POP" +"COVAR_POPArgMax" +"COVAR_POPArgMin" +"COVAR_POPArray" +"COVAR_POPDistinct" +"COVAR_POPForEach" +"COVAR_POPIf" +"COVAR_POPMap" +"COVAR_POPMerge" +"COVAR_POPNull" +"COVAR_POPOrDefault" +"COVAR_POPOrNull" +"COVAR_POPResample" +"COVAR_POPSimpleState" +"COVAR_POPState" +"COVAR_SAMP" +"COVAR_SAMPArgMax" +"COVAR_SAMPArgMin" +"COVAR_SAMPArray" +"COVAR_SAMPDistinct" +"COVAR_SAMPForEach" +"COVAR_SAMPIf" +"COVAR_SAMPMap" +"COVAR_SAMPMerge" +"COVAR_SAMPNull" +"COVAR_SAMPOrDefault" +"COVAR_SAMPOrNull" +"COVAR_SAMPResample" +"COVAR_SAMPSimpleState" +"COVAR_SAMPState" +"CRC32" +"CRC32IEEE" +"CRC64" +"CREATE POLICY" +"CREATE PROFILE" +"CREATE QUOTA" +"CREATE ROLE" +"CREATE ROW POLICY" +"CREATE SETTINGS PROFILE" +"CREATE TABLE" +"CREATE TEMPORARY TABLE" +"CREATE USER" +"CREATE" +"CROSS" +"CUBE" +"CURRENT GRANTS" +"CURRENT QUOTA" +"CURRENT ROLES" +"CURRENT ROW" +"CURRENT TRANSACTION" +"CURRENTUSER" +"CURRENT_USER" +"D" +"DATA INNER UUID" +"DATA" +"DATABASE" +"DATABASES" +"DATE" +"DATEADD" +"DATEDIFF" +"DATESUB" +"DATE_ADD" +"DATE_DIFF" +"DATE_FORMAT" +"DATE_SUB" +"DATE_TRUNC" +"DAY" +"DAYOFMONTH" +"DAYOFWEEK" +"DAYOFYEAR" +"DAYS" +"DD" +"DEC" +"DEDUPLICATE" +"DEFAULT DATABASE" +"DEFAULT ROLE" +"DEFAULT" +"DEFINER" +"DELETE WHERE" +"DELETE" +"DEPENDS ON" +"DESC" +"DESCENDING" +"DESCRIBE" +"DETACH PART" +"DETACH PARTITION" +"DETACH" +"DICTIONARIES" +"DICTIONARY" +"DISK" +"DISTINCT ON" +"DISTINCT" +"DIV" +"DOUBLE PRECISION" +"DOUBLE" +"DOUBLE_SHA1_HASH" +"DOUBLE_SHA1_PASSWORD" +"DROP COLUMN" +"DROP CONSTRAINT" +"DROP DEFAULT" +"DROP DETACHED PART" +"DROP DETACHED PARTITION" +"DROP INDEX" +"DROP PART" +"DROP PARTITION" +"DROP PROJECTION" +"DROP STATISTICS" +"DROP TABLE" +"DROP TEMPORARY TABLE" +"DROP" +"Date" +"Date32" +"DateTime" +"DateTime32" +"DateTime64" +"Decimal" +"Decimal128" +"Decimal256" +"Decimal32" +"Decimal64" +"Dynamic" +"ELSE" +"EMPTY AS" +"EMPTY" +"ENABLED ROLES" +"END" +"ENFORCED" +"ENGINE" +"ENUM" +"EPHEMERAL SEQUENTIAL" +"EPHEMERAL" +"ESTIMATE" +"EVENT" +"EVENTS" +"EVERY" +"EXCEPT DATABASE" +"EXCEPT DATABASES" +"EXCEPT TABLE" +"EXCEPT TABLES" +"EXCEPT" +"EXCHANGE DICTIONARIES" +"EXCHANGE TABLES" +"EXISTS" +"EXPLAIN" +"EXPRESSION" +"EXTENDED" +"EXTERNAL DDL FROM" +"EXTRACT" +"Enum" +"Enum16" +"Enum8" +"FALSE" +"FETCH PART" +"FETCH PARTITION" +"FETCH" +"FIELDS" +"FILE" +"FILESYSTEM CACHE" +"FILESYSTEM CACHES" +"FILTER" +"FINAL" +"FIRST" +"FIXED" +"FLOAT" +"FOLLOWING" +"FOR" +"FOREIGN KEY" +"FOREIGN" +"FORGET PARTITION" +"FORMAT" +"FORMAT_BYTES" +"FQDN" +"FREEZE" +"FROM INFILE" +"FROM SHARD" +"FROM" +"FROM_BASE64" +"FROM_DAYS" +"FROM_UNIXTIME" +"FULL" +"FULLTEXT" +"FUNCTION" +"FixedString" +"Float32" +"Float64" +"ForEach" +"GEOMETRY" +"GLOBAL IN" +"GLOBAL NOT IN" +"GLOBAL" +"GRANT OPTION FOR" +"GRANT" +"GRANTEES" +"GRANULARITY" +"GROUP BY" +"GROUPING SETS" +"GROUPS" +"H" +"HASH" +"HAVING" +"HDFS" +"HH" +"HIERARCHICAL" +"HOST" +"HOUR" +"HOURS" +"HTTP" +"ID" +"IDENTIFIED" +"IF EMPTY" +"IF EXISTS" +"IF NOT EXISTS" +"IGNORE NULLS" +"ILIKE" +"IN PARTITION" +"IN" +"INDEX" +"INDEXES" +"INDICES" +"INET4" +"INET6" +"INET6_ATON" +"INET6_NTOA" +"INET_ATON" +"INET_NTOA" +"INHERIT" +"INJECTIVE" +"INNER" +"INSERT INTO" +"INT SIGNED" +"INT UNSIGNED" +"INT" +"INT1 SIGNED" +"INT1 UNSIGNED" +"INT1" +"INTEGER SIGNED" +"INTEGER UNSIGNED" +"INTEGER" +"INTERPOLATE" +"INTERSECT" +"INTERVAL" +"INTO OUTFILE" +"INVISIBLE" +"INVOKER" +"IP" +"IPv4" +"IPv4CIDRToRange" +"IPv4NumToString" +"IPv4NumToStringClassC" +"IPv4StringToNum" +"IPv4StringToNumOrDefault" +"IPv4StringToNumOrNull" +"IPv4ToIPv6" +"IPv6" +"IPv6CIDRToRange" +"IPv6NumToString" +"IPv6StringToNum" +"IPv6StringToNumOrDefault" +"IPv6StringToNumOrNull" +"IS NOT DISTINCT FROM" +"IS NOT NULL" +"IS NULL" +"IS_OBJECT_ID" +"Int128" +"Int16" +"Int256" +"Int32" +"Int64" +"Int8" +"IntervalDay" +"IntervalHour" +"IntervalMicrosecond" +"IntervalMillisecond" +"IntervalMinute" +"IntervalMonth" +"IntervalNanosecond" +"IntervalQuarter" +"IntervalSecond" +"IntervalWeek" +"IntervalYear" +"JOIN" +"JSON" +"JSONArrayLength" +"JSONExtract" +"JSONExtractArrayRaw" +"JSONExtractBool" +"JSONExtractFloat" +"JSONExtractInt" +"JSONExtractKeys" +"JSONExtractKeysAndValues" +"JSONExtractKeysAndValuesRaw" +"JSONExtractRaw" +"JSONExtractString" +"JSONExtractUInt" +"JSONHas" +"JSONKey" +"JSONLength" +"JSONMergePatch" +"JSONType" +"JSON_ARRAY_LENGTH" +"JSON_EXISTS" +"JSON_QUERY" +"JSON_VALUE" +"JWT" +"KERBEROS" +"KEY BY" +"KEY" +"KEYED BY" +"KEYS" +"KILL" +"KIND" +"L1Distance" +"L1Norm" +"L1Normalize" +"L2Distance" +"L2Norm" +"L2Normalize" +"L2SquaredDistance" +"L2SquaredNorm" +"LARGE OBJECT" +"LAST" +"LAST_DAY" +"LAYOUT" +"LDAP" +"LEADING" +"LEFT ARRAY JOIN" +"LEFT" +"LESS THAN" +"LEVEL" +"LIFETIME" +"LIGHTWEIGHT" +"LIKE" +"LIMIT" +"LINEAR" +"LIST" +"LIVE" +"LOCAL" +"LONGBLOB" +"LONGTEXT" +"LTRIM" +"LineString" +"LinfDistance" +"LinfNorm" +"LinfNormalize" +"LowCardinality" +"LpDistance" +"LpNorm" +"LpNormalize" +"M" +"MACNumToString" +"MACStringToNum" +"MACStringToOUI" +"MAP_FROM_ARRAYS" +"MATCH" +"MATERIALIZE COLUMN" +"MATERIALIZE INDEX" +"MATERIALIZE PROJECTION" +"MATERIALIZE STATISTICS" +"MATERIALIZE TTL" +"MATERIALIZE" +"MATERIALIZED" +"MAX" +"MCS" +"MD4" +"MD5" +"MEDIUMBLOB" +"MEDIUMINT SIGNED" +"MEDIUMINT UNSIGNED" +"MEDIUMINT" +"MEDIUMTEXT" +"MEMORY" +"MERGES" +"METRICS INNER UUID" +"METRICS" +"MI" +"MICROSECOND" +"MICROSECONDS" +"MILLISECOND" +"MILLISECONDS" +"MIN" +"MINUTE" +"MINUTES" +"MM" +"MOD" +"MODIFY COLUMN" +"MODIFY COMMENT" +"MODIFY DEFINER" +"MODIFY ORDER BY" +"MODIFY QUERY" +"MODIFY REFRESH" +"MODIFY SAMPLE BY" +"MODIFY SETTING" +"MODIFY SQL SECURITY" +"MODIFY STATISTICS" +"MODIFY TTL" +"MODIFY" +"MONTH" +"MONTHS" +"MOVE PART" +"MOVE PARTITION" +"MOVE" +"MS" +"MUTATION" +"Map" +"Merge" +"MultiLineString" +"MultiPolygon" +"N" +"NAME" +"NAMED COLLECTION" +"NANOSECOND" +"NANOSECONDS" +"NATIONAL CHAR VARYING" +"NATIONAL CHAR" +"NATIONAL CHARACTER LARGE OBJECT" +"NATIONAL CHARACTER VARYING" +"NATIONAL CHARACTER" +"NCHAR LARGE OBJECT" +"NCHAR VARYING" +"NCHAR" +"NEXT" +"NO ACTION" +"NO DELAY" +"NO LIMITS" +"NONE" +"NOT BETWEEN" +"NOT IDENTIFIED" +"NOT ILIKE" +"NOT IN" +"NOT KEYED" +"NOT LIKE" +"NOT OVERRIDABLE" +"NOT" +"NO_PASSWORD" +"NS" +"NULL" +"NULLS" +"NUMERIC" +"NVARCHAR" +"Nested" +"Nothing" +"Null" +"Nullable" +"OCTET_LENGTH" +"OFFSET" +"ON DELETE" +"ON UPDATE" +"ON VOLUME" +"ON" +"ONLY" +"OPTIMIZE TABLE" +"OR REPLACE" +"OR" +"ORDER BY" +"OUTER" +"OVER" +"OVERRIDABLE" +"Object" +"PART" +"PARTIAL" +"PARTITION BY" +"PARTITION" +"PARTITIONS" +"PART_MOVE_TO_SHARD" +"PASTE" +"PERIODIC REFRESH" +"PERMANENTLY" +"PERMISSIVE" +"PERSISTENT SEQUENTIAL" +"PERSISTENT" +"PIPELINE" +"PLAINTEXT_PASSWORD" +"PLAN" +"POPULATE" +"PRECEDING" +"PRECISION" +"PREWHERE" +"PRIMARY KEY" +"PRIMARY" +"PROFILE" +"PROJECTION" +"PULL" +"Point" +"Polygon" +"Protobuf" +"Q" +"QQ" +"QUALIFY" +"QUARTER" +"QUARTERS" +"QUERY TREE" +"QUERY" +"QUOTA" +"RANDOMIZE FOR" +"RANDOMIZED" +"RANGE" +"READONLY" +"REAL" +"REALM" +"RECOMPRESS" +"RECURSIVE" +"REFERENCES" +"REFRESH" +"REGEXP" +"REGEXP_EXTRACT" +"REGEXP_MATCHES" +"REGEXP_REPLACE" +"REMOVE SAMPLE BY" +"REMOVE TTL" +"REMOVE" +"RENAME COLUMN" +"RENAME DATABASE" +"RENAME DICTIONARY" +"RENAME TABLE" +"RENAME TO" +"RENAME" +"REPLACE PARTITION" +"REPLACE" +"RESET SETTING" +"RESPECT NULLS" +"RESTORE" +"RESTRICT" +"RESTRICTIVE" +"RESUME" +"REVOKE" +"RIGHT" +"ROLLBACK" +"ROLLUP" +"ROW" +"ROWS" +"RTRIM" +"Resample" +"Ring" +"S" +"S3" +"SALT" +"SAMPLE BY" +"SAMPLE" +"SAN" +"SCHEMA" +"SCHEME" +"SECOND" +"SECONDS" +"SELECT" +"SEMI" +"SERVER" +"SET DEFAULT ROLE" +"SET DEFAULT" +"SET FAKE TIME" +"SET NULL" +"SET ROLE DEFAULT" +"SET ROLE" +"SET TRANSACTION SNAPSHOT" +"SET" +"SETTINGS" +"SHA1" +"SHA224" +"SHA256" +"SHA256_HASH" +"SHA256_PASSWORD" +"SHA384" +"SHA512" +"SHA512_256" +"SHOW ACCESS" +"SHOW CREATE" +"SHOW ENGINES" +"SHOW FUNCTIONS" +"SHOW GRANTS" +"SHOW PRIVILEGES" +"SHOW PROCESSLIST" +"SHOW SETTING" +"SHOW" +"SIGNED" +"SIMPLE" +"SINGLE" +"SMALLINT SIGNED" +"SMALLINT UNSIGNED" +"SMALLINT" +"SOURCE" +"SPATIAL" +"SQL SECURITY" +"SQL_TSI_DAY" +"SQL_TSI_HOUR" +"SQL_TSI_MICROSECOND" +"SQL_TSI_MILLISECOND" +"SQL_TSI_MINUTE" +"SQL_TSI_MONTH" +"SQL_TSI_NANOSECOND" +"SQL_TSI_QUARTER" +"SQL_TSI_SECOND" +"SQL_TSI_WEEK" +"SQL_TSI_YEAR" +"SS" +"SSH_KEY" +"SSL_CERTIFICATE" +"START TRANSACTION" +"STATISTICS" +"STD" +"STDArgMax" +"STDArgMin" +"STDArray" +"STDDEV_POP" +"STDDEV_POPArgMax" +"STDDEV_POPArgMin" +"STDDEV_POPArray" +"STDDEV_POPDistinct" +"STDDEV_POPForEach" +"STDDEV_POPIf" +"STDDEV_POPMap" +"STDDEV_POPMerge" +"STDDEV_POPNull" +"STDDEV_POPOrDefault" +"STDDEV_POPOrNull" +"STDDEV_POPResample" +"STDDEV_POPSimpleState" +"STDDEV_POPState" +"STDDEV_SAMP" +"STDDEV_SAMPArgMax" +"STDDEV_SAMPArgMin" +"STDDEV_SAMPArray" +"STDDEV_SAMPDistinct" +"STDDEV_SAMPForEach" +"STDDEV_SAMPIf" +"STDDEV_SAMPMap" +"STDDEV_SAMPMerge" +"STDDEV_SAMPNull" +"STDDEV_SAMPOrDefault" +"STDDEV_SAMPOrNull" +"STDDEV_SAMPResample" +"STDDEV_SAMPSimpleState" +"STDDEV_SAMPState" +"STDDistinct" +"STDForEach" +"STDIf" +"STDMap" +"STDMerge" +"STDNull" +"STDOrDefault" +"STDOrNull" +"STDResample" +"STDSimpleState" +"STDState" +"STEP" +"STORAGE" +"STRICT" +"STRICTLY_ASCENDING" +"SUBPARTITION BY" +"SUBPARTITION" +"SUBPARTITIONS" +"SUBSTRING" +"SUBSTRING_INDEX" +"SUSPEND" +"SVG" +"SYNC" +"SYNTAX" +"SYSTEM" +"SimpleAggregateFunction" +"State" +"String" +"TABLE OVERRIDE" +"TABLE" +"TABLES" +"TAGS INNER UUID" +"TAGS" +"TEMPORARY TABLE" +"TEMPORARY" +"TEST" +"TEXT" +"THEN" +"TIME" +"TIMESTAMP" +"TIMESTAMPADD" +"TIMESTAMPDIFF" +"TIMESTAMPSUB" +"TIMESTAMP_ADD" +"TIMESTAMP_DIFF" +"TIMESTAMP_SUB" +"TINYBLOB" +"TINYINT SIGNED" +"TINYINT UNSIGNED" +"TINYINT" +"TINYTEXT" +"TO DISK" +"TO INNER UUID" +"TO SHARD" +"TO TABLE" +"TO VOLUME" +"TO" +"TOP" +"TOTALS" +"TO_BASE64" +"TO_DAYS" +"TO_UNIXTIME" +"TRACKING ONLY" +"TRAILING" +"TRANSACTION" +"TRIGGER" +"TRIM" +"TRUE" +"TRUNCATE" +"TTL" +"TYPE" +"TYPEOF" +"Tuple" +"UInt128" +"UInt16" +"UInt256" +"UInt32" +"UInt64" +"UInt8" +"ULIDStringToDateTime" +"UNBOUNDED" +"UNDROP" +"UNFREEZE" +"UNION" +"UNIQUE" +"UNSET FAKE TIME" +"UNSIGNED" +"UPDATE" +"URL" +"URLHash" +"URLHierarchy" +"URLPathHierarchy" +"USE" +"USING" +"UTCTimestamp" +"UTC_timestamp" +"UUID" +"UUIDNumToString" +"UUIDStringToNum" +"UUIDToNum" +"UUIDv7ToDateTime" +"VALID UNTIL" +"VALUES" +"VARBINARY" +"VARCHAR" +"VARCHAR2" +"VARYING" +"VAR_POP" +"VAR_POPArgMax" +"VAR_POPArgMin" +"VAR_POPArray" +"VAR_POPDistinct" +"VAR_POPForEach" +"VAR_POPIf" +"VAR_POPMap" +"VAR_POPMerge" +"VAR_POPNull" +"VAR_POPOrDefault" +"VAR_POPOrNull" +"VAR_POPResample" +"VAR_POPSimpleState" +"VAR_POPState" +"VAR_SAMP" +"VAR_SAMPArgMax" +"VAR_SAMPArgMin" +"VAR_SAMPArray" +"VAR_SAMPDistinct" +"VAR_SAMPForEach" +"VAR_SAMPIf" +"VAR_SAMPMap" +"VAR_SAMPMerge" +"VAR_SAMPNull" +"VAR_SAMPOrDefault" +"VAR_SAMPOrNull" +"VAR_SAMPResample" +"VAR_SAMPSimpleState" +"VAR_SAMPState" +"VIEW" +"VISIBLE" +"Variant" +"WATCH" +"WATERMARK" +"WEEK" +"WEEKS" +"WHEN" +"WHERE" +"WINDOW" +"WITH ADMIN OPTION" +"WITH CHECK" +"WITH FILL" +"WITH GRANT OPTION" +"WITH NAME" +"WITH REPLACE OPTION" +"WITH TIES" +"WITH" +"WITH_ITEMINDEX" +"WK" +"WRITABLE" +"WW" +"YEAR" +"YEARS" +"YY" +"YYYY" +"YYYYMMDDToDate" +"YYYYMMDDToDate32" +"YYYYMMDDhhmmssToDateTime" +"YYYYMMDDhhmmssToDateTime64" +"ZKPATH" +"_CAST" +"__actionName" +"__bitBoolMaskAnd" +"__bitBoolMaskOr" +"__bitSwapLastTwo" +"__bitWrapperFunc" +"__getScalar" +"__scalarSubqueryResult" "abs" "accurateCast" "accurateCastOrDefault" "accurateCastOrNull" "acos" "acosh" -"ADD" -"ADD COLUMN" -"ADD CONSTRAINT" "addDate" "addDays" "addHours" -"ADD INDEX" "addInterval" "addMicroseconds" "addMilliseconds" "addMinutes" "addMonths" "addNanoseconds" -"ADD PROJECTION" "addQuarters" -"addressToLine" -"addressToLineWithInlines" -"addressToSymbol" "addSeconds" -"ADD STATISTIC" "addTupleOfIntervals" "addWeeks" "addYears" -"ADMIN OPTION FOR" +"addressToLine" +"addressToLineWithInlines" +"addressToSymbol" "aes_decrypt_mysql" "aes_encrypt_mysql" -"AFTER" "age" -"AggregateFunction" "aggThrow" "aggThrowArgMax" "aggThrowArgMin" @@ -48,24 +1001,7 @@ "aggThrowResample" "aggThrowSimpleState" "aggThrowState" -"ALGORITHM" -"ALIAS" -"ALL" -"ALLOWED_LATENESS" "alphaTokens" -"ALTER" -"ALTER COLUMN" -"ALTER DATABASE" -"ALTER LIVE VIEW" -"ALTER POLICY" -"ALTER PROFILE" -"ALTER QUOTA" -"ALTER ROLE" -"ALTER ROW POLICY" -"ALTER SETTINGS PROFILE" -"ALTER TABLE" -"ALTER TEMPORARY TABLE" -"ALTER USER" "analysisOfVariance" "analysisOfVarianceArgMax" "analysisOfVarianceArgMin" @@ -82,8 +1018,6 @@ "analysisOfVarianceSimpleState" "analysisOfVarianceState" "and" -"AND" -"AND STDOUT" "anova" "anovaArgMax" "anovaArgMin" @@ -99,9 +1033,7 @@ "anovaResample" "anovaSimpleState" "anovaState" -"ANTI" "any" -"ANY" "anyArgMax" "anyArgMin" "anyArray" @@ -136,6 +1068,8 @@ "anyLastOrDefault" "anyLastOrNull" "anyLastResample" +"anyLastSimpleState" +"anyLastState" "anyLast_respect_nulls" "anyLast_respect_nullsArgMax" "anyLast_respect_nullsArgMin" @@ -151,14 +1085,14 @@ "anyLast_respect_nullsResample" "anyLast_respect_nullsSimpleState" "anyLast_respect_nullsState" -"anyLastSimpleState" -"anyLastState" "anyMap" "anyMerge" "anyNull" "anyOrDefault" "anyOrNull" "anyResample" +"anySimpleState" +"anyState" "any_respect_nulls" "any_respect_nullsArgMax" "any_respect_nullsArgMin" @@ -174,8 +1108,6 @@ "any_respect_nullsResample" "any_respect_nullsSimpleState" "any_respect_nullsState" -"anySimpleState" -"anyState" "any_value" "any_valueArgMax" "any_valueArgMin" @@ -189,6 +1121,8 @@ "any_valueOrDefault" "any_valueOrNull" "any_valueResample" +"any_valueSimpleState" +"any_valueState" "any_value_respect_nulls" "any_value_respect_nullsArgMax" "any_value_respect_nullsArgMin" @@ -204,12 +1138,7 @@ "any_value_respect_nullsResample" "any_value_respect_nullsSimpleState" "any_value_respect_nullsState" -"any_valueSimpleState" -"any_valueState" -"APPEND" "appendTrailingCharIfAbsent" -"APPLY" -"APPLY DELETED MASK" "approx_top_count" "approx_top_countArgMax" "approx_top_countArgMin" @@ -286,42 +1215,11 @@ "argMinSimpleState" "argMinState" "array" -"Array" -"array_agg" -"array_aggArgMax" -"array_aggArgMin" -"array_aggArray" -"array_aggDistinct" -"array_aggForEach" -"array_aggIf" -"array_aggMap" -"array_aggMerge" -"array_aggNull" -"array_aggOrDefault" -"array_aggOrNull" -"array_aggResample" -"array_aggSimpleState" -"array_aggState" -"arrayAll" "arrayAUC" +"arrayAll" "arrayAvg" "arrayCompact" "arrayConcat" -"array_concat_agg" -"array_concat_aggArgMax" -"array_concat_aggArgMin" -"array_concat_aggArray" -"array_concat_aggDistinct" -"array_concat_aggForEach" -"array_concat_aggIf" -"array_concat_aggMap" -"array_concat_aggMerge" -"array_concat_aggNull" -"array_concat_aggOrDefault" -"array_concat_aggOrNull" -"array_concat_aggResample" -"array_concat_aggSimpleState" -"array_concat_aggState" "arrayCount" "arrayCumSum" "arrayCumSumNonNegative" @@ -345,7 +1243,6 @@ "arrayIntersect" "arrayJaccardIndex" "arrayJoin" -"ARRAY JOIN" "arrayLast" "arrayLastIndex" "arrayLastOrNull" @@ -382,31 +1279,43 @@ "arrayUniq" "arrayWithConstant" "arrayZip" -"AS" -"ASC" -"ASCENDING" +"array_agg" +"array_aggArgMax" +"array_aggArgMin" +"array_aggArray" +"array_aggDistinct" +"array_aggForEach" +"array_aggIf" +"array_aggMap" +"array_aggMerge" +"array_aggNull" +"array_aggOrDefault" +"array_aggOrNull" +"array_aggResample" +"array_aggSimpleState" +"array_aggState" +"array_concat_agg" +"array_concat_aggArgMax" +"array_concat_aggArgMin" +"array_concat_aggArray" +"array_concat_aggDistinct" +"array_concat_aggForEach" +"array_concat_aggIf" +"array_concat_aggMap" +"array_concat_aggMerge" +"array_concat_aggNull" +"array_concat_aggOrDefault" +"array_concat_aggOrNull" +"array_concat_aggResample" +"array_concat_aggSimpleState" +"array_concat_aggState" "ascii" "asin" "asinh" -"ASOF" -"ASSUME" "assumeNotNull" -"AST" -"ASYNC" "atan" "atan2" "atanh" -"ATTACH" -"ATTACH PART" -"ATTACH PARTITION" -"ATTACH POLICY" -"ATTACH PROFILE" -"ATTACH QUOTA" -"ATTACH ROLE" -"ATTACH ROW POLICY" -"ATTACH SETTINGS PROFILE" -"ATTACH USER" -"AUTO_INCREMENT" "avg" "avgArgMax" "avgArgMin" @@ -437,49 +1346,32 @@ "avgWeightedResample" "avgWeightedSimpleState" "avgWeightedState" -"AZURE" -"BACKUP" "bagexpansion" "bar" "base58Decode" "base58Encode" "base64Decode" "base64Encode" +"base64URLDecode" +"base64URLEncode" "base_backup" "basename" -"BCRYPT_HASH" -"BCRYPT_PASSWORD" -"BEGIN TRANSACTION" -"BETWEEN" -"BIDIRECTIONAL" -"BIGINT" -"BIGINT SIGNED" -"BIGINT UNSIGNED" "bin" -"BINARY" -"BINARY LARGE OBJECT" -"BINARY VARYING" -"BIT" "bitAnd" -"BIT_AND" -"BIT_ANDArgMax" -"BIT_ANDArgMin" -"BIT_ANDArray" -"BIT_ANDDistinct" -"BIT_ANDForEach" -"BIT_ANDIf" -"BIT_ANDMap" -"BIT_ANDMerge" -"BIT_ANDNull" -"BIT_ANDOrDefault" -"BIT_ANDOrNull" -"BIT_ANDResample" -"BIT_ANDSimpleState" -"BIT_ANDState" -"__bitBoolMaskAnd" -"__bitBoolMaskOr" "bitCount" "bitHammingDistance" +"bitNot" +"bitOr" +"bitPositionsToArray" +"bitRotateLeft" +"bitRotateRight" +"bitShiftLeft" +"bitShiftRight" +"bitSlice" +"bitTest" +"bitTestAll" +"bitTestAny" +"bitXor" "bitmapAnd" "bitmapAndCardinality" "bitmapAndnot" @@ -501,59 +1393,11 @@ "bitmapXorCardinality" "bitmaskToArray" "bitmaskToList" -"bitNot" -"bitOr" -"BIT_OR" -"BIT_ORArgMax" -"BIT_ORArgMin" -"BIT_ORArray" -"BIT_ORDistinct" -"BIT_ORForEach" -"BIT_ORIf" -"BIT_ORMap" -"BIT_ORMerge" -"BIT_ORNull" -"BIT_OROrDefault" -"BIT_OROrNull" -"BIT_ORResample" -"BIT_ORSimpleState" -"BIT_ORState" -"bitPositionsToArray" -"bitRotateLeft" -"bitRotateRight" -"bitShiftLeft" -"bitShiftRight" -"bitSlice" -"__bitSwapLastTwo" -"bitTest" -"bitTestAll" -"bitTestAny" -"__bitWrapperFunc" -"bitXor" -"BIT_XOR" -"BIT_XORArgMax" -"BIT_XORArgMin" -"BIT_XORArray" -"BIT_XORDistinct" -"BIT_XORForEach" -"BIT_XORIf" -"BIT_XORMap" -"BIT_XORMerge" -"BIT_XORNull" -"BIT_XOROrDefault" -"BIT_XOROrNull" -"BIT_XORResample" -"BIT_XORSimpleState" -"BIT_XORState" -"BLAKE3" -"BLOB" "blockNumber" "blockSerializedSize" "blockSize" "bool" -"Bool" "boolean" -"BOTH" "boundingRatio" "boundingRatioArgMax" "boundingRatioArgMin" @@ -570,21 +1414,14 @@ "boundingRatioSimpleState" "boundingRatioState" "buildId" -"BY" -"BYTE" -"BYTEA" "byteHammingDistance" "byteSize" "byteSlice" "byteSwap" -"CASCADE" -"CASE" "caseWithExpr" "caseWithExpression" "caseWithoutExpr" "caseWithoutExpression" -"_CAST" -"CAST" "catboostEvaluate" "categoricalInformationValue" "categoricalInformationValueArgMax" @@ -604,50 +1441,24 @@ "cbrt" "ceil" "ceiling" -"CHANGE" -"CHANGEABLE_IN_READONLY" -"CHANGED" +"changeDay" +"changeHour" +"changeMinute" +"changeMonth" +"changeSecond" +"changeYear" "char" -"CHAR" -"CHARACTER" -"CHARACTER LARGE OBJECT" -"CHARACTER_LENGTH" -"CHARACTER VARYING" -"CHAR LARGE OBJECT" -"CHAR_LENGTH" -"CHAR VARYING" -"CHECK" -"CHECK ALL TABLES" -"CHECK TABLE" "cityHash64" -"CLEANUP" -"CLEAR COLUMN" -"CLEAR INDEX" -"CLEAR PROJECTION" -"CLEAR STATISTIC" -"CLOB" -"CLUSTER" +"clamp" "cluster_host_ids" -"CLUSTERS" -"CN" "coalesce" -"CODEC" -"COLLATE" -"COLUMN" -"COLUMNS" -"COMMENT" -"COMMENT COLUMN" -"COMMIT" -"COMPRESSION" "concat" "concatAssumeInjective" "concatWithSeparator" "concatWithSeparatorAssumeInjective" "concat_ws" -"connection_id" "connectionId" -"CONST" -"CONSTRAINT" +"connection_id" "contingency" "contingencyArgMax" "contingencyArgMin" @@ -735,21 +1546,13 @@ "countSubstringsCaseInsensitive" "countSubstringsCaseInsensitiveUTF8" "covarPop" -"COVAR_POP" "covarPopArgMax" -"COVAR_POPArgMax" "covarPopArgMin" -"COVAR_POPArgMin" "covarPopArray" -"COVAR_POPArray" "covarPopDistinct" -"COVAR_POPDistinct" "covarPopForEach" -"COVAR_POPForEach" "covarPopIf" -"COVAR_POPIf" "covarPopMap" -"COVAR_POPMap" "covarPopMatrix" "covarPopMatrixArgMax" "covarPopMatrixArgMin" @@ -766,17 +1569,11 @@ "covarPopMatrixSimpleState" "covarPopMatrixState" "covarPopMerge" -"COVAR_POPMerge" "covarPopNull" -"COVAR_POPNull" "covarPopOrDefault" -"COVAR_POPOrDefault" "covarPopOrNull" -"COVAR_POPOrNull" "covarPopResample" -"COVAR_POPResample" "covarPopSimpleState" -"COVAR_POPSimpleState" "covarPopStable" "covarPopStableArgMax" "covarPopStableArgMin" @@ -793,23 +1590,14 @@ "covarPopStableSimpleState" "covarPopStableState" "covarPopState" -"COVAR_POPState" "covarSamp" -"COVAR_SAMP" "covarSampArgMax" -"COVAR_SAMPArgMax" "covarSampArgMin" -"COVAR_SAMPArgMin" "covarSampArray" -"COVAR_SAMPArray" "covarSampDistinct" -"COVAR_SAMPDistinct" "covarSampForEach" -"COVAR_SAMPForEach" "covarSampIf" -"COVAR_SAMPIf" "covarSampMap" -"COVAR_SAMPMap" "covarSampMatrix" "covarSampMatrixArgMax" "covarSampMatrixArgMin" @@ -826,17 +1614,11 @@ "covarSampMatrixSimpleState" "covarSampMatrixState" "covarSampMerge" -"COVAR_SAMPMerge" "covarSampNull" -"COVAR_SAMPNull" "covarSampOrDefault" -"COVAR_SAMPOrDefault" "covarSampOrNull" -"COVAR_SAMPOrNull" "covarSampResample" -"COVAR_SAMPResample" "covarSampSimpleState" -"COVAR_SAMPSimpleState" "covarSampStable" "covarSampStableArgMax" "covarSampStableArgMin" @@ -853,7 +1635,6 @@ "covarSampStableSimpleState" "covarSampStableState" "covarSampState" -"COVAR_SAMPState" "cramersV" "cramersVArgMax" "cramersVArgMin" @@ -884,38 +1665,17 @@ "cramersVResample" "cramersVSimpleState" "cramersVState" -"CRC32" -"CRC32IEEE" -"CRC64" -"CREATE" -"CREATE POLICY" -"CREATE PROFILE" -"CREATE QUOTA" -"CREATE ROLE" -"CREATE ROW POLICY" -"CREATE SETTINGS PROFILE" -"CREATE TABLE" -"CREATE TEMPORARY TABLE" -"CREATE USER" -"CROSS" -"CUBE" "curdate" -"current_database" "currentDatabase" -"current_date" -"CURRENT GRANTS" "currentProfiles" -"CURRENT QUOTA" "currentRoles" -"CURRENT ROLES" -"CURRENT ROW" -"current_schemas" "currentSchemas" -"current_timestamp" -"CURRENT TRANSACTION" "currentUser" -"CURRENT_USER" -"CURRENTUSER" +"current_database" +"current_date" +"current_schemas" +"current_timestamp" +"current_user" "cutFragment" "cutIPv6" "cutQueryString" @@ -930,59 +1690,25 @@ "cutToFirstSignificantSubdomainWithWWWRFC" "cutURLParameter" "cutWWW" -"D" "damerauLevenshteinDistance" -"DATABASE" -"DATABASES" -"Date" -"DATE" -"Date32" -"DATE_ADD" -"DATEADD" -"date_diff" "dateDiff" -"DATE_DIFF" -"DATEDIFF" -"DATE_FORMAT" "dateName" -"DATE_SUB" -"DATESUB" -"DateTime" -"DateTime32" -"DateTime64" "dateTime64ToSnowflake" +"dateTime64ToSnowflakeID" "dateTimeToSnowflake" +"dateTimeToSnowflakeID" "dateTrunc" -"DATE_TRUNC" -"DAY" -"DAYOFMONTH" -"DAYOFWEEK" -"DAYOFYEAR" -"DAYS" -"DD" -"DEC" -"Decimal" -"Decimal128" -"Decimal256" -"Decimal32" -"Decimal64" +"date_diff" "decodeHTMLComponent" "decodeURLComponent" "decodeURLFormComponent" "decodeXMLComponent" "decrypt" -"DEDUPLICATE" -"DEFAULT" -"DEFAULT DATABASE" "defaultProfiles" -"DEFAULT ROLE" "defaultRoles" "defaultValueOfArgumentType" "defaultValueOfTypeName" -"DEFINER" "degrees" -"DELETE" -"DELETE WHERE" "deltaSum" "deltaSumArgMax" "deltaSumArgMin" @@ -1014,6 +1740,21 @@ "deltaSumTimestampSimpleState" "deltaSumTimestampState" "demangle" +"denseRank" +"denseRankArgMax" +"denseRankArgMin" +"denseRankArray" +"denseRankDistinct" +"denseRankForEach" +"denseRankIf" +"denseRankMap" +"denseRankMerge" +"denseRankNull" +"denseRankOrDefault" +"denseRankOrNull" +"denseRankResample" +"denseRankSimpleState" +"denseRankState" "dense_rank" "dense_rankArgMax" "dense_rankArgMin" @@ -1029,13 +1770,6 @@ "dense_rankResample" "dense_rankSimpleState" "dense_rankState" -"DEPENDS ON" -"DESC" -"DESCENDING" -"DESCRIBE" -"DETACH" -"DETACH PART" -"DETACH PARTITION" "detectCharset" "detectLanguage" "detectLanguageMixed" @@ -1055,6 +1789,10 @@ "dictGetFloat64" "dictGetFloat64OrDefault" "dictGetHierarchy" +"dictGetIPv4" +"dictGetIPv4OrDefault" +"dictGetIPv6" +"dictGetIPv6OrDefault" "dictGetInt16" "dictGetInt16OrDefault" "dictGetInt32" @@ -1063,10 +1801,6 @@ "dictGetInt64OrDefault" "dictGetInt8" "dictGetInt8OrDefault" -"dictGetIPv4" -"dictGetIPv4OrDefault" -"dictGetIPv6" -"dictGetIPv6OrDefault" "dictGetOrDefault" "dictGetOrNull" "dictGetString" @@ -1082,19 +1816,13 @@ "dictGetUUID" "dictGetUUIDOrDefault" "dictHas" -"DICTIONARIES" -"DICTIONARY" "dictIsIn" -"DISK" "displayName" "distanceL1" "distanceL2" "distanceL2Squared" "distanceLinf" "distanceLp" -"DISTINCT" -"DISTINCT ON" -"DIV" "divide" "divideDecimal" "domain" @@ -1102,29 +1830,13 @@ "domainWithoutWWW" "domainWithoutWWWRFC" "dotProduct" -"DOUBLE" -"DOUBLE PRECISION" -"DOUBLE_SHA1_HASH" -"DOUBLE_SHA1_PASSWORD" -"DROP" -"DROP COLUMN" -"DROP CONSTRAINT" -"DROP DEFAULT" -"DROP DETACHED PART" -"DROP DETACHED PARTITION" -"DROP INDEX" -"DROP PART" -"DROP PARTITION" -"DROP PROJECTION" -"DROP STATISTIC" -"DROP TABLE" -"DROP TEMPORARY TABLE" "dumpColumnStructure" +"dynamicElement" +"dynamicType" "e" "editDistance" -"ELSE" +"editDistanceUTF8" "empty" -"EMPTY" "emptyArrayDate" "emptyArrayDateTime" "emptyArrayFloat32" @@ -1139,19 +1851,14 @@ "emptyArrayUInt32" "emptyArrayUInt64" "emptyArrayUInt8" -"EMPTY AS" "enabledProfiles" "enabledRoles" -"ENABLED ROLES" "encodeURLComponent" "encodeURLFormComponent" "encodeXMLComponent" "encrypt" -"END" "endsWith" "endsWithUTF8" -"ENFORCED" -"ENGINE" "entropy" "entropyArgMax" "entropyArgMin" @@ -1167,33 +1874,14 @@ "entropyResample" "entropySimpleState" "entropyState" -"Enum" -"ENUM" -"Enum16" -"Enum8" -"EPHEMERAL" -"EPHEMERAL SEQUENTIAL" "equals" "erf" "erfc" "errorCodeToName" -"ESTIMATE" "evalMLMethod" -"EVENT" -"EVENTS" -"EVERY" -"EXCEPT" -"EXCEPT DATABASE" -"EXCEPT DATABASES" -"EXCEPT TABLE" -"EXCEPT TABLES" -"EXCHANGE DICTIONARIES" -"EXCHANGE TABLES" -"EXISTS" "exp" "exp10" "exp2" -"EXPLAIN" "exponentialMovingAverage" "exponentialMovingAverageArgMax" "exponentialMovingAverageArgMin" @@ -1269,11 +1957,7 @@ "exponentialTimeDecayedSumResample" "exponentialTimeDecayedSumSimpleState" "exponentialTimeDecayedSumState" -"EXPRESSION" -"EXTENDED" -"EXTERNAL DDL FROM" "extract" -"EXTRACT" "extractAll" "extractAllGroups" "extractAllGroupsHorizontal" @@ -1286,24 +1970,13 @@ "extractURLParameterNames" "extractURLParameters" "factorial" -"FALSE" "farmFingerprint64" "farmHash64" -"FETCH" -"FETCH PART" -"FETCH PARTITION" -"FIELDS" "file" -"FILE" "filesystemAvailable" -"FILESYSTEM CACHE" -"FILESYSTEM CACHES" "filesystemCapacity" "filesystemUnreserved" -"FILTER" -"FINAL" "finalizeAggregation" -"FIRST" "firstLine" "firstSignificantSubdomain" "firstSignificantSubdomainCustom" @@ -1322,6 +1995,8 @@ "first_valueOrDefault" "first_valueOrNull" "first_valueResample" +"first_valueSimpleState" +"first_valueState" "first_value_respect_nulls" "first_value_respect_nullsArgMax" "first_value_respect_nullsArgMin" @@ -1337,10 +2012,6 @@ "first_value_respect_nullsResample" "first_value_respect_nullsSimpleState" "first_value_respect_nullsState" -"first_valueSimpleState" -"first_valueState" -"FIXED" -"FixedString" "flameGraph" "flameGraphArgMax" "flameGraphArgMin" @@ -1358,19 +2029,8 @@ "flameGraphState" "flatten" "flattenTuple" -"FLOAT" -"Float32" -"Float64" "floor" -"FOLLOWING" -"FOR" -"ForEach" -"FOREIGN" -"FOREIGN KEY" -"FORGET PARTITION" "format" -"FORMAT" -"FORMAT_BYTES" "formatDateTime" "formatDateTimeInJodaSyntax" "formatQuery" @@ -1383,67 +2043,50 @@ "formatReadableTimeDelta" "formatRow" "formatRowNoNewline" -"FQDN" "fragment" -"FREEZE" -"FROM" -"FROM_BASE64" -"FROM_DAYS" "fromDaysSinceYearZero" "fromDaysSinceYearZero32" -"FROM INFILE" "fromModifiedJulianDay" "fromModifiedJulianDayOrNull" -"FROM SHARD" -"FROM_UNIXTIME" +"fromUTCTimestamp" "fromUnixTimestamp" "fromUnixTimestamp64Micro" "fromUnixTimestamp64Milli" "fromUnixTimestamp64Nano" "fromUnixTimestampInJodaSyntax" "from_utc_timestamp" -"fromUTCTimestamp" -"FULL" "fullHostName" -"FULLTEXT" -"FUNCTION" "fuzzBits" "gccMurmurHash" "gcd" "generateRandomStructure" +"generateSnowflakeID" "generateULID" "generateUUIDv4" +"generateUUIDv7" "geoDistance" +"geoToH3" +"geoToS2" "geohashDecode" "geohashEncode" "geohashesInBox" -"GEOMETRY" -"geoToH3" -"geoToS2" +"getClientHTTPHeader" "getMacro" "getOSKernelVersion" -"__getScalar" "getServerPort" "getSetting" "getSizeOfEnumType" "getSubcolumn" "getTypeSerializationStreams" -"GLOBAL" "globalIn" -"GLOBAL IN" "globalInIgnoreSet" "globalNotIn" -"GLOBAL NOT IN" "globalNotInIgnoreSet" "globalNotNullIn" "globalNotNullInIgnoreSet" "globalNullIn" "globalNullInIgnoreSet" "globalVariable" -"GRANT" -"GRANTEES" -"GRANT OPTION FOR" -"GRANULARITY" "greatCircleAngle" "greatCircleDistance" "greater" @@ -1584,6 +2227,36 @@ "groupBitAndResample" "groupBitAndSimpleState" "groupBitAndState" +"groupBitOr" +"groupBitOrArgMax" +"groupBitOrArgMin" +"groupBitOrArray" +"groupBitOrDistinct" +"groupBitOrForEach" +"groupBitOrIf" +"groupBitOrMap" +"groupBitOrMerge" +"groupBitOrNull" +"groupBitOrOrDefault" +"groupBitOrOrNull" +"groupBitOrResample" +"groupBitOrSimpleState" +"groupBitOrState" +"groupBitXor" +"groupBitXorArgMax" +"groupBitXorArgMin" +"groupBitXorArray" +"groupBitXorDistinct" +"groupBitXorForEach" +"groupBitXorIf" +"groupBitXorMap" +"groupBitXorMerge" +"groupBitXorNull" +"groupBitXorOrDefault" +"groupBitXorOrNull" +"groupBitXorResample" +"groupBitXorSimpleState" +"groupBitXorState" "groupBitmap" "groupBitmapAnd" "groupBitmapAndArgMax" @@ -1643,39 +2316,21 @@ "groupBitmapXorResample" "groupBitmapXorSimpleState" "groupBitmapXorState" -"groupBitOr" -"groupBitOrArgMax" -"groupBitOrArgMin" -"groupBitOrArray" -"groupBitOrDistinct" -"groupBitOrForEach" -"groupBitOrIf" -"groupBitOrMap" -"groupBitOrMerge" -"groupBitOrNull" -"groupBitOrOrDefault" -"groupBitOrOrNull" -"groupBitOrResample" -"groupBitOrSimpleState" -"groupBitOrState" -"groupBitXor" -"groupBitXorArgMax" -"groupBitXorArgMin" -"groupBitXorArray" -"groupBitXorDistinct" -"groupBitXorForEach" -"groupBitXorIf" -"groupBitXorMap" -"groupBitXorMerge" -"groupBitXorNull" -"groupBitXorOrDefault" -"groupBitXorOrNull" -"groupBitXorResample" -"groupBitXorSimpleState" -"groupBitXorState" -"GROUP BY" -"GROUPING SETS" -"GROUPS" +"groupConcat" +"groupConcatArgMax" +"groupConcatArgMin" +"groupConcatArray" +"groupConcatDistinct" +"groupConcatForEach" +"groupConcatIf" +"groupConcatMap" +"groupConcatMerge" +"groupConcatNull" +"groupConcatOrDefault" +"groupConcatOrNull" +"groupConcatResample" +"groupConcatSimpleState" +"groupConcatState" "groupUniqArray" "groupUniqArrayArgMax" "groupUniqArrayArgMin" @@ -1691,7 +2346,21 @@ "groupUniqArrayResample" "groupUniqArraySimpleState" "groupUniqArrayState" -"H" +"group_concat" +"group_concatArgMax" +"group_concatArgMin" +"group_concatArray" +"group_concatDistinct" +"group_concatForEach" +"group_concatIf" +"group_concatMap" +"group_concatMerge" +"group_concatNull" +"group_concatOrDefault" +"group_concatOrNull" +"group_concatResample" +"group_concatSimpleState" +"group_concatState" "h3CellAreaM2" "h3CellAreaRads2" "h3Distance" @@ -1719,7 +2388,6 @@ "h3IsPentagon" "h3IsResClassIII" "h3IsValid" -"h3kRing" "h3Line" "h3NumHexagons" "h3PointDistKm" @@ -1732,12 +2400,12 @@ "h3ToParent" "h3ToString" "h3UnidirectionalEdgeIsValid" +"h3kRing" "halfMD5" "has" "hasAll" "hasAny" "hasColumnInTable" -"HASH" "hasSubsequence" "hasSubsequenceCaseInsensitive" "hasSubsequenceCaseInsensitiveUTF8" @@ -1748,11 +2416,9 @@ "hasTokenCaseInsensitive" "hasTokenCaseInsensitiveOrNull" "hasTokenOrNull" -"HAVING" -"HDFS" "hex" -"HH" -"HIERARCHICAL" +"hilbertDecode" +"hilbertEncode" "histogram" "histogramArgMax" "histogramArgMin" @@ -1772,73 +2438,33 @@ "hop" "hopEnd" "hopStart" -"HOST" -"hostname" "hostName" -"HOUR" -"HOURS" -"HTTP" +"hostname" "hypot" -"ID" -"IDENTIFIED" "identity" "idnaDecode" "idnaEncode" "if" -"IF EMPTY" -"IF EXISTS" -"IF NOT EXISTS" "ifNotFinite" "ifNull" "ignore" -"IGNORE NULLS" "ilike" -"ILIKE" "in" -"IN" -"INDEX" -"INDEXES" +"inIgnoreSet" "indexHint" "indexOf" -"INDICES" -"INET4" -"INET6" -"INET6_ATON" -"INET6_NTOA" -"INET_ATON" -"INET_NTOA" -"INHERIT" -"inIgnoreSet" "initcap" "initcapUTF8" -"initializeAggregation" -"initial_query_id" "initialQueryID" -"INJECTIVE" -"INNER" -"IN PARTITION" -"INSERT INTO" +"initial_query_id" +"initializeAggregation" "instr" -"INT" -"INT1" -"Int128" -"Int16" -"INT1 SIGNED" -"INT1 UNSIGNED" -"Int256" -"Int32" -"Int64" -"Int8" "intDiv" "intDivOrZero" -"INTEGER" -"INTEGER SIGNED" -"INTEGER UNSIGNED" -"INTERPOLATE" -"INTERSECT" -"INTERVAL" -"IntervalDay" -"IntervalHour" +"intExp10" +"intExp2" +"intHash32" +"intHash64" "intervalLengthSum" "intervalLengthSumArgMax" "intervalLengthSumArgMin" @@ -1854,55 +2480,18 @@ "intervalLengthSumResample" "intervalLengthSumSimpleState" "intervalLengthSumState" -"IntervalMicrosecond" -"IntervalMillisecond" -"IntervalMinute" -"IntervalMonth" -"IntervalNanosecond" -"IntervalQuarter" -"IntervalSecond" -"IntervalWeek" -"IntervalYear" -"intExp10" -"intExp2" -"intHash32" -"intHash64" -"INTO OUTFILE" -"INT SIGNED" -"INT UNSIGNED" -"INVISIBLE" -"INVOKER" -"IP" -"IPv4" -"IPv4CIDRToRange" -"IPv4NumToString" -"IPv4NumToStringClassC" -"IPv4StringToNum" -"IPv4StringToNumOrDefault" -"IPv4StringToNumOrNull" -"IPv4ToIPv6" -"IPv6" -"IPv6CIDRToRange" -"IPv6NumToString" -"IPv6StringToNum" -"IPv6StringToNumOrDefault" -"IPv6StringToNumOrNull" "isConstant" "isDecimalOverflow" "isFinite" -"isInfinite" "isIPAddressInRange" "isIPv4String" "isIPv6String" +"isInfinite" "isNaN" "isNotDistinctFrom" -"IS NOT DISTINCT FROM" "isNotNull" -"IS NOT NULL" "isNull" -"IS NULL" "isNullable" -"IS_OBJECT_ID" "isValidJSON" "isValidUTF8" "isZeroOrNull" @@ -1910,40 +2499,11 @@ "jaroWinklerSimilarity" "javaHash" "javaHashUTF16LE" -"JOIN" "joinGet" "joinGetOrNull" -"JSON" -"JSONArrayLength" -"JSON_ARRAY_LENGTH" -"JSON_EXISTS" -"JSONExtract" -"JSONExtractArrayRaw" -"JSONExtractBool" -"JSONExtractFloat" -"JSONExtractInt" -"JSONExtractKeys" -"JSONExtractKeysAndValues" -"JSONExtractKeysAndValuesRaw" -"JSONExtractRaw" -"JSONExtractString" -"JSONExtractUInt" -"JSONHas" -"JSONKey" -"JSONLength" "jsonMergePatch" -"JSON_QUERY" -"JSONType" -"JSON_VALUE" "jumpConsistentHash" "kafkaMurmurHash" -"KERBEROS" -"KEY" -"KEY BY" -"KEYED BY" -"KEYS" -"KILL" -"KIND" "kolmogorovSmirnovTest" "kolmogorovSmirnovTestArgMax" "kolmogorovSmirnovTestArgMin" @@ -1992,14 +2552,6 @@ "kurtSampResample" "kurtSampSimpleState" "kurtSampState" -"L1Distance" -"L1Norm" -"L1Normalize" -"L2Distance" -"L2Norm" -"L2Normalize" -"L2SquaredDistance" -"L2SquaredNorm" "lagInFrame" "lagInFrameArgMax" "lagInFrameArgMin" @@ -2015,7 +2567,6 @@ "lagInFrameResample" "lagInFrameSimpleState" "lagInFrameState" -"LARGE OBJECT" "largestTriangleThreeBuckets" "largestTriangleThreeBucketsArgMax" "largestTriangleThreeBucketsArgMin" @@ -2031,8 +2582,6 @@ "largestTriangleThreeBucketsResample" "largestTriangleThreeBucketsSimpleState" "largestTriangleThreeBucketsState" -"LAST" -"LAST_DAY" "last_value" "last_valueArgMax" "last_valueArgMin" @@ -2046,6 +2595,8 @@ "last_valueOrDefault" "last_valueOrNull" "last_valueResample" +"last_valueSimpleState" +"last_valueState" "last_value_respect_nulls" "last_value_respect_nullsArgMax" "last_value_respect_nullsArgMin" @@ -2061,12 +2612,8 @@ "last_value_respect_nullsResample" "last_value_respect_nullsSimpleState" "last_value_respect_nullsState" -"last_valueSimpleState" -"last_valueState" -"LAYOUT" "lcase" "lcm" -"LDAP" "leadInFrame" "leadInFrameArgMax" "leadInFrameArgMin" @@ -2082,11 +2629,8 @@ "leadInFrameResample" "leadInFrameSimpleState" "leadInFrameState" -"LEADING" "least" "left" -"LEFT" -"LEFT ARRAY JOIN" "leftPad" "leftPadUTF8" "leftUTF8" @@ -2095,42 +2639,23 @@ "lengthUTF8" "less" "lessOrEquals" -"LESS THAN" -"LEVEL" "levenshteinDistance" +"levenshteinDistanceUTF8" "lgamma" -"LIFETIME" -"LIGHTWEIGHT" "like" -"LIKE" -"LIMIT" -"LINEAR" -"LinfDistance" -"LinfNorm" -"LinfNormalize" -"LIST" -"LIVE" "ln" -"LOCAL" "locate" "log" "log10" "log1p" "log2" "logTrace" -"LONGBLOB" -"LONGTEXT" -"LowCardinality" "lowCardinalityIndices" "lowCardinalityKeys" "lower" "lowerUTF8" "lpad" -"LpDistance" -"LpNorm" -"LpNormalize" "ltrim" -"LTRIM" "lttb" "lttbArgMax" "lttbArgMin" @@ -2146,10 +2671,6 @@ "lttbResample" "lttbSimpleState" "lttbState" -"M" -"MACNumToString" -"MACStringToNum" -"MACStringToOUI" "makeDate" "makeDate32" "makeDateTime" @@ -2170,7 +2691,6 @@ "mannWhitneyUTestSimpleState" "mannWhitneyUTestState" "map" -"Map" "mapAdd" "mapAll" "mapApply" @@ -2181,7 +2701,6 @@ "mapExtractKeyLike" "mapFilter" "mapFromArrays" -"MAP_FROM_ARRAYS" "mapFromString" "mapKeys" "mapPartialReverseSort" @@ -2193,17 +2712,8 @@ "mapUpdate" "mapValues" "match" -"MATCH" "materialize" -"MATERIALIZE" -"MATERIALIZE COLUMN" -"MATERIALIZED" -"MATERIALIZE INDEX" -"MATERIALIZE PROJECTION" -"MATERIALIZE STATISTIC" -"MATERIALIZE TTL" "max" -"MAX" "max2" "maxArgMax" "maxArgMin" @@ -2264,9 +2774,6 @@ "maxResample" "maxSimpleState" "maxState" -"MCS" -"MD4" -"MD5" "meanZTest" "meanZTestArgMax" "meanZTestArgMin" @@ -2507,23 +3014,9 @@ "medianTimingWeightedResample" "medianTimingWeightedSimpleState" "medianTimingWeightedState" -"MEDIUMBLOB" -"MEDIUMINT" -"MEDIUMINT SIGNED" -"MEDIUMINT UNSIGNED" -"MEDIUMTEXT" -"MEMORY" -"Merge" -"MERGES" "metroHash64" -"MI" -"MICROSECOND" -"MICROSECONDS" "mid" -"MILLISECOND" -"MILLISECONDS" "min" -"MIN" "min2" "minArgMax" "minArgMin" @@ -2558,35 +3051,15 @@ "minSimpleState" "minState" "minus" -"MINUTE" -"MINUTES" "mismatches" -"MM" "mod" -"MOD" -"MODIFY" -"MODIFY COLUMN" -"MODIFY COMMENT" -"MODIFY ORDER BY" -"MODIFY QUERY" -"MODIFY REFRESH" -"MODIFY SAMPLE BY" -"MODIFY SETTING" -"MODIFY SQL SECURITY" -"MODIFY TTL" "modulo" "moduloLegacy" "moduloOrZero" -"MONTH" "monthName" -"MONTHS" "mortonDecode" "mortonEncode" -"MOVE" -"MOVE PART" -"MOVE PARTITION" "movingXXX" -"MS" "multiFuzzyMatchAllIndices" "multiFuzzyMatchAny" "multiFuzzyMatchAnyIndex" @@ -2594,9 +3067,6 @@ "multiMatchAllIndices" "multiMatchAny" "multiMatchAnyIndex" -"multiply" -"multiplyDecimal" -"MultiPolygon" "multiSearchAllPositions" "multiSearchAllPositionsCaseInsensitive" "multiSearchAllPositionsCaseInsensitiveUTF8" @@ -2613,31 +3083,17 @@ "multiSearchFirstPositionCaseInsensitive" "multiSearchFirstPositionCaseInsensitiveUTF8" "multiSearchFirstPositionUTF8" +"multiply" +"multiplyDecimal" "murmurHash2_32" "murmurHash2_64" "murmurHash3_128" "murmurHash3_32" "murmurHash3_64" -"MUTATION" -"N" -"NAME" -"NAMED COLLECTION" -"NANOSECOND" -"NANOSECONDS" -"NATIONAL CHAR" -"NATIONAL CHARACTER" -"NATIONAL CHARACTER LARGE OBJECT" -"NATIONAL CHARACTER VARYING" -"NATIONAL CHAR VARYING" -"NCHAR" -"NCHAR LARGE OBJECT" -"NCHAR VARYING" "negate" "neighbor" "nested" -"Nested" "netloc" -"NEXT" "ngramDistance" "ngramDistanceCaseInsensitive" "ngramDistanceCaseInsensitiveUTF8" @@ -2650,7 +3106,6 @@ "ngramMinHashCaseInsensitive" "ngramMinHashCaseInsensitiveUTF8" "ngramMinHashUTF8" -"ngrams" "ngramSearch" "ngramSearchCaseInsensitive" "ngramSearchCaseInsensitiveUTF8" @@ -2659,10 +3114,7 @@ "ngramSimHashCaseInsensitive" "ngramSimHashCaseInsensitiveUTF8" "ngramSimHashUTF8" -"NO ACTION" -"NO DELAY" -"NO LIMITS" -"NONE" +"ngrams" "nonNegativeDerivative" "nonNegativeDerivativeArgMax" "nonNegativeDerivativeArgMin" @@ -2678,9 +3130,11 @@ "nonNegativeDerivativeResample" "nonNegativeDerivativeSimpleState" "nonNegativeDerivativeState" -"NO_PASSWORD" -"normalizedQueryHash" -"normalizedQueryHashKeepNames" +"normL1" +"normL2" +"normL2Squared" +"normLinf" +"normLp" "normalizeL1" "normalizeL2" "normalizeLinf" @@ -2691,18 +3145,18 @@ "normalizeUTF8NFD" "normalizeUTF8NFKC" "normalizeUTF8NFKD" -"normL1" -"normL2" -"normL2Squared" -"normLinf" -"normLp" +"normalizedQueryHash" +"normalizedQueryHashKeepNames" "not" -"NOT" -"NOT BETWEEN" "notEmpty" "notEquals" +"notILike" +"notIn" +"notInIgnoreSet" +"notLike" +"notNullIn" +"notNullInIgnoreSet" "nothing" -"Nothing" "nothingArgMax" "nothingArgMin" "nothingArray" @@ -2746,22 +3200,9 @@ "nothingUInt64Resample" "nothingUInt64SimpleState" "nothingUInt64State" -"NOT IDENTIFIED" -"notILike" -"NOT ILIKE" -"notIn" -"NOT IN" -"notInIgnoreSet" -"NOT KEYED" -"notLike" -"NOT LIKE" -"notNullIn" -"notNullInIgnoreSet" -"NOT OVERRIDABLE" "now" "now64" "nowInBlock" -"NS" "nth_value" "nth_valueArgMax" "nth_valueArgMin" @@ -2792,31 +3233,10 @@ "ntileResample" "ntileSimpleState" "ntileState" -"Null" -"NULL" -"Nullable" "nullIf" "nullIn" "nullInIgnoreSet" -"NULLS" -"NUMERIC" -"NVARCHAR" -"Object" -"OCTET_LENGTH" -"OFFSET" -"ON" -"ON DELETE" -"ONLY" -"ON UPDATE" -"ON VOLUME" -"OPTIMIZE TABLE" "or" -"OR" -"ORDER BY" -"OR REPLACE" -"OUTER" -"OVER" -"OVERRIDABLE" "parseDateTime" "parseDateTime32BestEffort" "parseDateTime32BestEffortOrNull" @@ -2838,32 +3258,49 @@ "parseDateTimeInJodaSyntaxOrZero" "parseDateTimeOrNull" "parseDateTimeOrZero" +"parseReadableSize" +"parseReadableSizeOrNull" +"parseReadableSizeOrZero" "parseTimeDelta" -"PART" -"PARTIAL" -"PARTITION" -"PARTITION BY" +"partitionID" "partitionId" -"PARTITIONS" -"PART_MOVE_TO_SHARD" -"PASTE" "path" "pathFull" -"PERIODIC REFRESH" -"PERMANENTLY" -"PERMISSIVE" -"PERSISTENT" -"PERSISTENT SEQUENTIAL" +"percentRank" +"percentRankArgMax" +"percentRankArgMin" +"percentRankArray" +"percentRankDistinct" +"percentRankForEach" +"percentRankIf" +"percentRankMap" +"percentRankMerge" +"percentRankNull" +"percentRankOrDefault" +"percentRankOrNull" +"percentRankResample" +"percentRankSimpleState" +"percentRankState" +"percent_rank" +"percent_rankArgMax" +"percent_rankArgMin" +"percent_rankArray" +"percent_rankDistinct" +"percent_rankForEach" +"percent_rankIf" +"percent_rankMap" +"percent_rankMerge" +"percent_rankNull" +"percent_rankOrDefault" +"percent_rankOrNull" +"percent_rankResample" +"percent_rankSimpleState" +"percent_rankState" "pi" -"PIPELINE" -"PLAINTEXT_PASSWORD" -"PLAN" "plus" "pmod" -"Point" "pointInEllipses" "pointInPolygon" -"Polygon" "polygonAreaCartesian" "polygonAreaSpherical" "polygonConvexHullCartesian" @@ -2880,32 +3317,21 @@ "polygonsUnionSpherical" "polygonsWithinCartesian" "polygonsWithinSpherical" -"POPULATE" "port" "portRFC" "position" "positionCaseInsensitive" "positionCaseInsensitiveUTF8" "positionUTF8" -"positive_modulo" "positiveModulo" +"positive_modulo" "pow" "power" -"PRECEDING" -"PRECISION" -"PREWHERE" -"PRIMARY" -"PRIMARY KEY" -"PROFILE" -"PROJECTION" +"printf" "proportionsZTest" -"Protobuf" "protocol" -"PULL" "punycodeDecode" "punycodeEncode" -"Q" -"QQ" "quantile" "quantileArgMax" "quantileArgMin" @@ -3099,6 +3525,68 @@ "quantileOrDefault" "quantileOrNull" "quantileResample" +"quantileSimpleState" +"quantileState" +"quantileTDigest" +"quantileTDigestArgMax" +"quantileTDigestArgMin" +"quantileTDigestArray" +"quantileTDigestDistinct" +"quantileTDigestForEach" +"quantileTDigestIf" +"quantileTDigestMap" +"quantileTDigestMerge" +"quantileTDigestNull" +"quantileTDigestOrDefault" +"quantileTDigestOrNull" +"quantileTDigestResample" +"quantileTDigestSimpleState" +"quantileTDigestState" +"quantileTDigestWeighted" +"quantileTDigestWeightedArgMax" +"quantileTDigestWeightedArgMin" +"quantileTDigestWeightedArray" +"quantileTDigestWeightedDistinct" +"quantileTDigestWeightedForEach" +"quantileTDigestWeightedIf" +"quantileTDigestWeightedMap" +"quantileTDigestWeightedMerge" +"quantileTDigestWeightedNull" +"quantileTDigestWeightedOrDefault" +"quantileTDigestWeightedOrNull" +"quantileTDigestWeightedResample" +"quantileTDigestWeightedSimpleState" +"quantileTDigestWeightedState" +"quantileTiming" +"quantileTimingArgMax" +"quantileTimingArgMin" +"quantileTimingArray" +"quantileTimingDistinct" +"quantileTimingForEach" +"quantileTimingIf" +"quantileTimingMap" +"quantileTimingMerge" +"quantileTimingNull" +"quantileTimingOrDefault" +"quantileTimingOrNull" +"quantileTimingResample" +"quantileTimingSimpleState" +"quantileTimingState" +"quantileTimingWeighted" +"quantileTimingWeightedArgMax" +"quantileTimingWeightedArgMin" +"quantileTimingWeightedArray" +"quantileTimingWeightedDistinct" +"quantileTimingWeightedForEach" +"quantileTimingWeightedIf" +"quantileTimingWeightedMap" +"quantileTimingWeightedMerge" +"quantileTimingWeightedNull" +"quantileTimingWeightedOrDefault" +"quantileTimingWeightedOrNull" +"quantileTimingWeightedResample" +"quantileTimingWeightedSimpleState" +"quantileTimingWeightedState" "quantiles" "quantilesArgMax" "quantilesArgMin" @@ -3271,7 +3759,6 @@ "quantilesGKSimpleState" "quantilesGKState" "quantilesIf" -"quantileSimpleState" "quantilesInterpolatedWeighted" "quantilesInterpolatedWeightedArgMax" "quantilesInterpolatedWeightedArgMin" @@ -3295,7 +3782,6 @@ "quantilesResample" "quantilesSimpleState" "quantilesState" -"quantileState" "quantilesTDigest" "quantilesTDigestArgMax" "quantilesTDigestArgMin" @@ -3356,75 +3842,10 @@ "quantilesTimingWeightedResample" "quantilesTimingWeightedSimpleState" "quantilesTimingWeightedState" -"quantileTDigest" -"quantileTDigestArgMax" -"quantileTDigestArgMin" -"quantileTDigestArray" -"quantileTDigestDistinct" -"quantileTDigestForEach" -"quantileTDigestIf" -"quantileTDigestMap" -"quantileTDigestMerge" -"quantileTDigestNull" -"quantileTDigestOrDefault" -"quantileTDigestOrNull" -"quantileTDigestResample" -"quantileTDigestSimpleState" -"quantileTDigestState" -"quantileTDigestWeighted" -"quantileTDigestWeightedArgMax" -"quantileTDigestWeightedArgMin" -"quantileTDigestWeightedArray" -"quantileTDigestWeightedDistinct" -"quantileTDigestWeightedForEach" -"quantileTDigestWeightedIf" -"quantileTDigestWeightedMap" -"quantileTDigestWeightedMerge" -"quantileTDigestWeightedNull" -"quantileTDigestWeightedOrDefault" -"quantileTDigestWeightedOrNull" -"quantileTDigestWeightedResample" -"quantileTDigestWeightedSimpleState" -"quantileTDigestWeightedState" -"quantileTiming" -"quantileTimingArgMax" -"quantileTimingArgMin" -"quantileTimingArray" -"quantileTimingDistinct" -"quantileTimingForEach" -"quantileTimingIf" -"quantileTimingMap" -"quantileTimingMerge" -"quantileTimingNull" -"quantileTimingOrDefault" -"quantileTimingOrNull" -"quantileTimingResample" -"quantileTimingSimpleState" -"quantileTimingState" -"quantileTimingWeighted" -"quantileTimingWeightedArgMax" -"quantileTimingWeightedArgMin" -"quantileTimingWeightedArray" -"quantileTimingWeightedDistinct" -"quantileTimingWeightedForEach" -"quantileTimingWeightedIf" -"quantileTimingWeightedMap" -"quantileTimingWeightedMerge" -"quantileTimingWeightedNull" -"quantileTimingWeightedOrDefault" -"quantileTimingWeightedOrNull" -"quantileTimingWeightedResample" -"quantileTimingWeightedSimpleState" -"quantileTimingWeightedState" -"QUARTER" -"QUARTERS" -"QUERY" -"query_id" "queryID" "queryString" "queryStringAndFragment" -"QUERY TREE" -"QUOTA" +"query_id" "radians" "rand" "rand32" @@ -3439,17 +3860,14 @@ "randLogNormal" "randNegativeBinomial" "randNormal" -"randomFixedString" -"RANDOMIZED" -"RANDOMIZE FOR" -"randomPrintableASCII" -"randomString" -"randomStringUTF8" "randPoisson" "randStudentT" "randUniform" +"randomFixedString" +"randomPrintableASCII" +"randomString" +"randomStringUTF8" "range" -"RANGE" "rank" "rankArgMax" "rankArgMin" @@ -3480,22 +3898,14 @@ "rankResample" "rankSimpleState" "rankState" -"READONLY" +"readWKTLineString" +"readWKTMultiLineString" "readWKTMultiPolygon" "readWKTPoint" "readWKTPolygon" "readWKTRing" -"REAL" -"REALM" -"RECOMPRESS" -"REFERENCES" -"REFRESH" -"REGEXP" "regexpExtract" -"REGEXP_EXTRACT" -"REGEXP_MATCHES" "regexpQuoteMeta" -"REGEXP_REPLACE" "regionHierarchy" "regionIn" "regionToArea" @@ -3526,31 +3936,13 @@ "reinterpretAsUInt64" "reinterpretAsUInt8" "reinterpretAsUUID" -"REMOVE" -"REMOVE SAMPLE BY" -"REMOVE TTL" -"RENAME" -"RENAME COLUMN" -"RENAME DATABASE" -"RENAME DICTIONARY" -"RENAME TABLE" -"RENAME TO" "repeat" "replace" -"REPLACE" "replaceAll" "replaceOne" -"REPLACE PARTITION" "replaceRegexpAll" "replaceRegexpOne" "replicate" -"Resample" -"RESET SETTING" -"RESPECT NULLS" -"RESTORE" -"RESTRICT" -"RESTRICTIVE" -"RESUME" "retention" "retentionArgMax" "retentionArgMin" @@ -3569,22 +3961,18 @@ "reverse" "reverseUTF8" "revision" -"REVOKE" "right" -"RIGHT" "rightPad" "rightPadUTF8" "rightUTF8" -"Ring" -"ROLLBACK" -"ROLLUP" "round" "roundAge" "roundBankers" "roundDown" "roundDuration" "roundToExp2" -"ROW" +"rowNumberInAllBlocks" +"rowNumberInBlock" "row_number" "row_numberArgMax" "row_numberArgMin" @@ -3592,8 +3980,6 @@ "row_numberDistinct" "row_numberForEach" "row_numberIf" -"rowNumberInAllBlocks" -"rowNumberInBlock" "row_numberMap" "row_numberMerge" "row_numberNull" @@ -3602,15 +3988,12 @@ "row_numberResample" "row_numberSimpleState" "row_numberState" -"ROWS" "rpad" "rtrim" -"RTRIM" "runningAccumulate" "runningConcurrency" "runningDifference" "runningDifferenceStartingWithFirstValue" -"S" "s2CapContains" "s2CapUnion" "s2CellsIntersect" @@ -3620,18 +4003,7 @@ "s2RectIntersection" "s2RectUnion" "s2ToGeo" -"S3" -"SALT" -"SAMPLE" -"SAMPLE BY" "scalarProduct" -"__scalarSubqueryResult" -"SCHEMA" -"SCHEME" -"SECOND" -"SECONDS" -"SELECT" -"SEMI" "sequenceCount" "sequenceCountArgMax" "sequenceCountArgMin" @@ -3680,44 +4052,14 @@ "seriesDecomposeSTL" "seriesOutliersDetectTukey" "seriesPeriodDetectFFT" -"SERVER" -"serverTimezone" "serverTimeZone" +"serverTimezone" "serverUUID" -"SET" -"SET DEFAULT" -"SET DEFAULT ROLE" -"SET FAKE TIME" -"SET NULL" -"SET ROLE" -"SET ROLE DEFAULT" -"SETTINGS" -"SET TRANSACTION SNAPSHOT" -"SHA1" -"SHA224" -"SHA256" -"SHA256_HASH" -"SHA256_PASSWORD" -"SHA384" -"SHA512" -"SHA512_256" "shardCount" "shardNum" -"SHOW" -"SHOW ACCESS" "showCertificate" -"SHOW CREATE" -"SHOW ENGINES" -"SHOW FUNCTIONS" -"SHOW GRANTS" -"SHOW PRIVILEGES" -"SHOW PROCESSLIST" -"SHOW SETTING" "sigmoid" "sign" -"SIGNED" -"SIMPLE" -"SimpleAggregateFunction" "simpleJSONExtractBool" "simpleJSONExtractFloat" "simpleJSONExtractInt" @@ -3741,7 +4083,6 @@ "simpleLinearRegressionSimpleState" "simpleLinearRegressionState" "sin" -"SINGLE" "singleValueOrNull" "singleValueOrNullArgMax" "singleValueOrNullArgMin" @@ -3796,45 +4137,42 @@ "skewSampState" "sleep" "sleepEachRow" -"SMALLINT" -"SMALLINT SIGNED" -"SMALLINT UNSIGNED" +"snowflakeIDToDateTime" +"snowflakeIDToDateTime64" "snowflakeToDateTime" "snowflakeToDateTime64" "soundex" -"SOURCE" "space" -"sparkbar" "sparkBar" -"sparkbarArgMax" "sparkBarArgMax" -"sparkbarArgMin" "sparkBarArgMin" -"sparkbarArray" "sparkBarArray" -"sparkbarDistinct" "sparkBarDistinct" -"sparkbarForEach" "sparkBarForEach" -"sparkbarIf" "sparkBarIf" -"sparkbarMap" "sparkBarMap" -"sparkbarMerge" "sparkBarMerge" -"sparkbarNull" "sparkBarNull" -"sparkbarOrDefault" "sparkBarOrDefault" -"sparkbarOrNull" "sparkBarOrNull" -"sparkbarResample" "sparkBarResample" -"sparkbarSimpleState" "sparkBarSimpleState" -"sparkbarState" "sparkBarState" -"SPATIAL" +"sparkbar" +"sparkbarArgMax" +"sparkbarArgMin" +"sparkbarArray" +"sparkbarDistinct" +"sparkbarForEach" +"sparkbarIf" +"sparkbarMap" +"sparkbarMerge" +"sparkbarNull" +"sparkbarOrDefault" +"sparkbarOrNull" +"sparkbarResample" +"sparkbarSimpleState" +"sparkbarState" "splitByAlpha" "splitByChar" "splitByNonAlpha" @@ -3844,58 +4182,23 @@ "sqid" "sqidDecode" "sqidEncode" -"SQL SECURITY" -"SQL_TSI_DAY" -"SQL_TSI_HOUR" -"SQL_TSI_MICROSECOND" -"SQL_TSI_MILLISECOND" -"SQL_TSI_MINUTE" -"SQL_TSI_MONTH" -"SQL_TSI_NANOSECOND" -"SQL_TSI_QUARTER" -"SQL_TSI_SECOND" -"SQL_TSI_WEEK" -"SQL_TSI_YEAR" "sqrt" -"SS" -"SSH_KEY" -"SSL_CERTIFICATE" "startsWith" "startsWithUTF8" -"State" -"STATISTIC" -"STD" -"STDArgMax" -"STDArgMin" -"STDArray" "stddevPop" -"STDDEV_POP" "stddevPopArgMax" -"STDDEV_POPArgMax" "stddevPopArgMin" -"STDDEV_POPArgMin" "stddevPopArray" -"STDDEV_POPArray" "stddevPopDistinct" -"STDDEV_POPDistinct" "stddevPopForEach" -"STDDEV_POPForEach" "stddevPopIf" -"STDDEV_POPIf" "stddevPopMap" -"STDDEV_POPMap" "stddevPopMerge" -"STDDEV_POPMerge" "stddevPopNull" -"STDDEV_POPNull" "stddevPopOrDefault" -"STDDEV_POPOrDefault" "stddevPopOrNull" -"STDDEV_POPOrNull" "stddevPopResample" -"STDDEV_POPResample" "stddevPopSimpleState" -"STDDEV_POPSimpleState" "stddevPopStable" "stddevPopStableArgMax" "stddevPopStableArgMin" @@ -3912,35 +4215,20 @@ "stddevPopStableSimpleState" "stddevPopStableState" "stddevPopState" -"STDDEV_POPState" "stddevSamp" -"STDDEV_SAMP" "stddevSampArgMax" -"STDDEV_SAMPArgMax" "stddevSampArgMin" -"STDDEV_SAMPArgMin" "stddevSampArray" -"STDDEV_SAMPArray" "stddevSampDistinct" -"STDDEV_SAMPDistinct" "stddevSampForEach" -"STDDEV_SAMPForEach" "stddevSampIf" -"STDDEV_SAMPIf" "stddevSampMap" -"STDDEV_SAMPMap" "stddevSampMerge" -"STDDEV_SAMPMerge" "stddevSampNull" -"STDDEV_SAMPNull" "stddevSampOrDefault" -"STDDEV_SAMPOrDefault" "stddevSampOrNull" -"STDDEV_SAMPOrNull" "stddevSampResample" -"STDDEV_SAMPResample" "stddevSampSimpleState" -"STDDEV_SAMPSimpleState" "stddevSampStable" "stddevSampStableArgMax" "stddevSampStableArgMin" @@ -3957,20 +4245,7 @@ "stddevSampStableSimpleState" "stddevSampStableState" "stddevSampState" -"STDDEV_SAMPState" -"STDDistinct" -"STDForEach" -"STDIf" -"STDMap" -"STDMerge" -"STDNull" -"STDOrDefault" -"STDOrNull" -"STDResample" -"STDSimpleState" -"STDState" "stem" -"STEP" "stochasticLinearRegression" "stochasticLinearRegressionArgMax" "stochasticLinearRegressionArgMin" @@ -4001,15 +4276,11 @@ "stochasticLogisticRegressionResample" "stochasticLogisticRegressionSimpleState" "stochasticLogisticRegressionState" -"STORAGE" -"STRICT" -"STRICTLY_ASCENDING" -"String" +"str_to_date" +"str_to_map" "stringJaccardIndex" "stringJaccardIndexUTF8" "stringToH3" -"str_to_date" -"str_to_map" "structureToCapnProtoSchema" "structureToProtobufSchema" "studentTTest" @@ -4029,14 +4300,9 @@ "studentTTestState" "subBitmap" "subDate" -"SUBPARTITION" -"SUBPARTITION BY" -"SUBPARTITIONS" "substr" "substring" -"SUBSTRING" "substringIndex" -"SUBSTRING_INDEX" "substringIndexUTF8" "substringUTF8" "subtractDays" @@ -4120,21 +4386,6 @@ "sumMapFilteredWithOverflowResample" "sumMapFilteredWithOverflowSimpleState" "sumMapFilteredWithOverflowState" -"sumMappedArrays" -"sumMappedArraysArgMax" -"sumMappedArraysArgMin" -"sumMappedArraysArray" -"sumMappedArraysDistinct" -"sumMappedArraysForEach" -"sumMappedArraysIf" -"sumMappedArraysMap" -"sumMappedArraysMerge" -"sumMappedArraysNull" -"sumMappedArraysOrDefault" -"sumMappedArraysOrNull" -"sumMappedArraysResample" -"sumMappedArraysSimpleState" -"sumMappedArraysState" "sumMapWithOverflow" "sumMapWithOverflowArgMax" "sumMapWithOverflowArgMin" @@ -4150,6 +4401,21 @@ "sumMapWithOverflowResample" "sumMapWithOverflowSimpleState" "sumMapWithOverflowState" +"sumMappedArrays" +"sumMappedArraysArgMax" +"sumMappedArraysArgMin" +"sumMappedArraysArray" +"sumMappedArraysDistinct" +"sumMappedArraysForEach" +"sumMappedArraysIf" +"sumMappedArraysMap" +"sumMappedArraysMerge" +"sumMappedArraysNull" +"sumMappedArraysOrDefault" +"sumMappedArraysOrNull" +"sumMappedArraysResample" +"sumMappedArraysSimpleState" +"sumMappedArraysState" "sumMerge" "sumNull" "sumOrDefault" @@ -4172,23 +4438,11 @@ "sumWithOverflowResample" "sumWithOverflowSimpleState" "sumWithOverflowState" -"SUSPEND" "svg" -"SVG" -"SYNC" "synonyms" -"SYNTAX" -"SYSTEM" -"TABLE" -"TABLE OVERRIDE" -"TABLES" "tan" "tanh" "tcpPort" -"TEMPORARY" -"TEMPORARY TABLE" -"TEST" -"TEXT" "tgamma" "theilsU" "theilsUArgMax" @@ -4205,36 +4459,20 @@ "theilsUResample" "theilsUSimpleState" "theilsUState" -"THEN" "throwIf" "tid" -"TIME" "timeDiff" "timeSlot" "timeSlots" -"timestamp" -"TIMESTAMP" -"TIMESTAMP_ADD" -"TIMESTAMPADD" -"timestamp_diff" -"timestampDiff" -"TIMESTAMP_DIFF" -"TIMESTAMPDIFF" -"TIMESTAMP_SUB" -"TIMESTAMPSUB" -"timezone" "timeZone" -"timezoneOf" "timeZoneOf" -"timezoneOffset" "timeZoneOffset" -"TINYBLOB" -"TINYINT" -"TINYINT SIGNED" -"TINYINT UNSIGNED" -"TINYTEXT" -"TO" -"TO_BASE64" +"timestamp" +"timestampDiff" +"timestamp_diff" +"timezone" +"timezoneOf" +"timezoneOffset" "toBool" "toColumnTypeName" "toDate" @@ -4254,11 +4492,9 @@ "toDateTimeOrDefault" "toDateTimeOrNull" "toDateTimeOrZero" -"today" "toDayOfMonth" "toDayOfWeek" "toDayOfYear" -"TO_DAYS" "toDaysSinceYearZero" "toDecimal128" "toDecimal128OrDefault" @@ -4277,7 +4513,6 @@ "toDecimal64OrNull" "toDecimal64OrZero" "toDecimalString" -"TO DISK" "toFixedString" "toFloat32" "toFloat32OrDefault" @@ -4288,7 +4523,16 @@ "toFloat64OrNull" "toFloat64OrZero" "toHour" -"TO INNER UUID" +"toIPv4" +"toIPv4OrDefault" +"toIPv4OrNull" +"toIPv4OrZero" +"toIPv6" +"toIPv6OrDefault" +"toIPv6OrNull" +"toIPv6OrZero" +"toISOWeek" +"toISOYear" "toInt128" "toInt128OrDefault" "toInt128OrNull" @@ -4324,18 +4568,7 @@ "toIntervalSecond" "toIntervalWeek" "toIntervalYear" -"toIPv4" -"toIPv4OrDefault" -"toIPv4OrNull" -"toIPv4OrZero" -"toIPv6" -"toIPv6OrDefault" -"toIPv6OrNull" -"toIPv6OrZero" -"toISOWeek" -"toISOYear" "toJSONString" -"tokens" "toLastDayOfMonth" "toLastDayOfWeek" "toLowCardinality" @@ -4346,7 +4579,82 @@ "toMonday" "toMonth" "toNullable" -"TOP" +"toQuarter" +"toRelativeDayNum" +"toRelativeHourNum" +"toRelativeMinuteNum" +"toRelativeMonthNum" +"toRelativeQuarterNum" +"toRelativeSecondNum" +"toRelativeWeekNum" +"toRelativeYearNum" +"toSecond" +"toStartOfDay" +"toStartOfFifteenMinutes" +"toStartOfFiveMinute" +"toStartOfFiveMinutes" +"toStartOfHour" +"toStartOfISOYear" +"toStartOfInterval" +"toStartOfMicrosecond" +"toStartOfMillisecond" +"toStartOfMinute" +"toStartOfMonth" +"toStartOfNanosecond" +"toStartOfQuarter" +"toStartOfSecond" +"toStartOfTenMinutes" +"toStartOfWeek" +"toStartOfYear" +"toString" +"toStringCutToZero" +"toTime" +"toTimeZone" +"toTimezone" +"toTypeName" +"toUInt128" +"toUInt128OrDefault" +"toUInt128OrNull" +"toUInt128OrZero" +"toUInt16" +"toUInt16OrDefault" +"toUInt16OrNull" +"toUInt16OrZero" +"toUInt256" +"toUInt256OrDefault" +"toUInt256OrNull" +"toUInt256OrZero" +"toUInt32" +"toUInt32OrDefault" +"toUInt32OrNull" +"toUInt32OrZero" +"toUInt64" +"toUInt64OrDefault" +"toUInt64OrNull" +"toUInt64OrZero" +"toUInt8" +"toUInt8OrDefault" +"toUInt8OrNull" +"toUInt8OrZero" +"toUTCTimestamp" +"toUUID" +"toUUIDOrDefault" +"toUUIDOrNull" +"toUUIDOrZero" +"toUnixTimestamp" +"toUnixTimestamp64Micro" +"toUnixTimestamp64Milli" +"toUnixTimestamp64Nano" +"toValidUTF8" +"toWeek" +"toYYYYMM" +"toYYYYMMDD" +"toYYYYMMDDhhmmss" +"toYear" +"toYearWeek" +"to_utc_timestamp" +"today" +"tokens" "topK" "topKArgMax" "topKArgMin" @@ -4379,115 +4687,28 @@ "topKWeightedState" "topLevelDomain" "topLevelDomainRFC" -"toQuarter" -"toRelativeDayNum" -"toRelativeHourNum" -"toRelativeMinuteNum" -"toRelativeMonthNum" -"toRelativeQuarterNum" -"toRelativeSecondNum" -"toRelativeWeekNum" -"toRelativeYearNum" -"toSecond" -"TO SHARD" -"toStartOfDay" -"toStartOfFifteenMinutes" -"toStartOfFiveMinute" -"toStartOfFiveMinutes" -"toStartOfHour" -"toStartOfInterval" -"toStartOfISOYear" -"toStartOfMicrosecond" -"toStartOfMillisecond" -"toStartOfMinute" -"toStartOfMonth" -"toStartOfNanosecond" -"toStartOfQuarter" -"toStartOfSecond" -"toStartOfTenMinutes" -"toStartOfWeek" -"toStartOfYear" -"toString" -"toStringCutToZero" -"TO TABLE" -"TOTALS" -"toTime" -"toTimezone" -"toTimeZone" -"toTypeName" -"toUInt128" -"toUInt128OrDefault" -"toUInt128OrNull" -"toUInt128OrZero" -"toUInt16" -"toUInt16OrDefault" -"toUInt16OrNull" -"toUInt16OrZero" -"toUInt256" -"toUInt256OrDefault" -"toUInt256OrNull" -"toUInt256OrZero" -"toUInt32" -"toUInt32OrDefault" -"toUInt32OrNull" -"toUInt32OrZero" -"toUInt64" -"toUInt64OrDefault" -"toUInt64OrNull" -"toUInt64OrZero" -"toUInt8" -"toUInt8OrDefault" -"toUInt8OrNull" -"toUInt8OrZero" -"TO_UNIXTIME" -"toUnixTimestamp" -"toUnixTimestamp64Micro" -"toUnixTimestamp64Milli" -"toUnixTimestamp64Nano" -"to_utc_timestamp" -"toUTCTimestamp" -"toUUID" -"toUUIDOrDefault" -"toUUIDOrNull" -"toUUIDOrZero" -"toValidUTF8" -"TO VOLUME" -"toWeek" -"toYear" -"toYearWeek" -"toYYYYMM" -"toYYYYMMDD" -"toYYYYMMDDhhmmss" -"TRACKING ONLY" -"TRAILING" -"TRANSACTION" "transactionID" "transactionLatestSnapshot" "transactionOldestSnapshot" "transform" "translate" "translateUTF8" -"TRIGGER" "trim" -"TRIM" "trimBoth" "trimLeft" "trimRight" -"TRUE" "trunc" "truncate" -"TRUNCATE" "tryBase58Decode" "tryBase64Decode" +"tryBase64URLDecode" "tryDecrypt" "tryIdnaEncode" "tryPunycodeDecode" -"TTL" "tumble" "tumbleEnd" "tumbleStart" "tuple" -"Tuple" "tupleConcat" "tupleDivide" "tupleDivideByNumber" @@ -4502,25 +4723,13 @@ "tupleModuloByNumber" "tupleMultiply" "tupleMultiplyByNumber" +"tupleNames" "tupleNegate" "tuplePlus" "tupleToNameValuePairs" -"TYPE" -"TYPEOF" "ucase" -"UInt128" -"UInt16" -"UInt256" -"UInt32" -"UInt64" -"UInt8" -"ULIDStringToDateTime" "unbin" -"UNBOUNDED" -"UNDROP" -"UNFREEZE" "unhex" -"UNION" "uniq" "uniqArgMax" "uniqArgMin" @@ -4614,7 +4823,6 @@ "uniqThetaSimpleState" "uniqThetaState" "uniqThetaUnion" -"UNIQUE" "uniqUpTo" "uniqUpToArgMax" "uniqUpToArgMin" @@ -4630,61 +4838,25 @@ "uniqUpToResample" "uniqUpToSimpleState" "uniqUpToState" -"UNSET FAKE TIME" -"UNSIGNED" -"UPDATE" "upper" "upperUTF8" "uptime" -"URL" -"URLHash" -"URLHierarchy" -"URLPathHierarchy" -"USE" "user" -"USING" -"UTC_timestamp" -"UTCTimestamp" -"UUID" -"UUIDNumToString" -"UUIDStringToNum" "validateNestedArraySizes" -"VALID UNTIL" -"VALUES" -"VARBINARY" -"VARCHAR" -"VARCHAR2" -"Variant" -"variantElement" -"variantType" "varPop" -"VAR_POP" "varPopArgMax" -"VAR_POPArgMax" "varPopArgMin" -"VAR_POPArgMin" "varPopArray" -"VAR_POPArray" "varPopDistinct" -"VAR_POPDistinct" "varPopForEach" -"VAR_POPForEach" "varPopIf" -"VAR_POPIf" "varPopMap" -"VAR_POPMap" "varPopMerge" -"VAR_POPMerge" "varPopNull" -"VAR_POPNull" "varPopOrDefault" -"VAR_POPOrDefault" "varPopOrNull" -"VAR_POPOrNull" "varPopResample" -"VAR_POPResample" "varPopSimpleState" -"VAR_POPSimpleState" "varPopStable" "varPopStableArgMax" "varPopStableArgMin" @@ -4701,35 +4873,20 @@ "varPopStableSimpleState" "varPopStableState" "varPopState" -"VAR_POPState" "varSamp" -"VAR_SAMP" "varSampArgMax" -"VAR_SAMPArgMax" "varSampArgMin" -"VAR_SAMPArgMin" "varSampArray" -"VAR_SAMPArray" "varSampDistinct" -"VAR_SAMPDistinct" "varSampForEach" -"VAR_SAMPForEach" "varSampIf" -"VAR_SAMPIf" "varSampMap" -"VAR_SAMPMap" "varSampMerge" -"VAR_SAMPMerge" "varSampNull" -"VAR_SAMPNull" "varSampOrDefault" -"VAR_SAMPOrDefault" "varSampOrNull" -"VAR_SAMPOrNull" "varSampResample" -"VAR_SAMPResample" "varSampSimpleState" -"VAR_SAMPSimpleState" "varSampStable" "varSampStableArgMax" "varSampStableArgMin" @@ -4746,13 +4903,11 @@ "varSampStableSimpleState" "varSampStableState" "varSampState" -"VAR_SAMPState" -"VARYING" +"variantElement" +"variantType" "vectorDifference" "vectorSum" "version" -"VIEW" -"VISIBLE" "visibleWidth" "visitParamExtractBool" "visitParamExtractFloat" @@ -4761,11 +4916,7 @@ "visitParamExtractString" "visitParamExtractUInt" "visitParamHas" -"WATCH" -"WATERMARK" "week" -"WEEK" -"WEEKS" "welchTTest" "welchTTestArgMax" "welchTTestArgMin" @@ -4781,11 +4932,8 @@ "welchTTestResample" "welchTTestSimpleState" "welchTTestState" -"WHEN" -"WHERE" -"width_bucket" "widthBucket" -"WINDOW" +"width_bucket" "windowFunnel" "windowFunnelArgMax" "windowFunnelArgMin" @@ -4802,16 +4950,6 @@ "windowFunnelSimpleState" "windowFunnelState" "windowID" -"WITH" -"WITH ADMIN OPTION" -"WITH CHECK" -"WITH FILL" -"WITH GRANT OPTION" -"with_itemindex" -"WITH NAME" -"WITH REPLACE OPTION" -"WITH TIES" -"WK" "wkt" "wordShingleMinHash" "wordShingleMinHashArg" @@ -4825,23 +4963,12 @@ "wordShingleSimHashCaseInsensitive" "wordShingleSimHashCaseInsensitiveUTF8" "wordShingleSimHashUTF8" -"WRITABLE" -"WW" "wyHash64" "xor" -"xxh3" "xxHash32" "xxHash64" +"xxh3" "yandexConsistentHash" -"YEAR" -"YEARS" "yearweek" "yesterday" -"YY" -"YYYY" -"YYYYMMDDhhmmssToDateTime" -"YYYYMMDDhhmmssToDateTime64" -"YYYYMMDDToDate" -"YYYYMMDDToDate32" -"ZKPATH" "zookeeperSessionUptime" diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict index a01a94fd3e3..797905203b2 100644 --- a/tests/fuzz/dictionaries/datatypes.dict +++ b/tests/fuzz/dictionaries/datatypes.dict @@ -31,6 +31,7 @@ "Decimal256" "Decimal32" "Decimal64" +"Dynamic" "ENUM" "Enum" "Enum16" @@ -74,6 +75,7 @@ "JSON" "LONGBLOB" "LONGTEXT" +"LineString" "LowCardinality" "MEDIUMBLOB" "MEDIUMINT" @@ -81,6 +83,7 @@ "MEDIUMINT UNSIGNED" "MEDIUMTEXT" "Map" +"MultiLineString" "MultiPolygon" "NATIONAL CHAR" "NATIONAL CHAR VARYING" @@ -132,4 +135,3 @@ "YEAR" "bool" "boolean" -"Dynamic" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index 6f2a88c22fa..e562595fb67 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -126,6 +126,7 @@ "JSONHas" "JSONKey" "JSONLength" +"JSONMergePatch" "JSONType" "JSON_ARRAY_LENGTH" "JSON_EXISTS" @@ -227,6 +228,8 @@ "UTC_timestamp" "UUIDNumToString" "UUIDStringToNum" +"UUIDToNum" +"UUIDv7ToDateTime" "VAR_POP" "VAR_POPArgMax" "VAR_POPArgMin" @@ -263,6 +266,7 @@ "YYYYMMDDhhmmssToDateTime" "YYYYMMDDhhmmssToDateTime64" "_CAST" +"__actionName" "__bitBoolMaskAnd" "__bitBoolMaskOr" "__bitSwapLastTwo" @@ -660,6 +664,8 @@ "base58Encode" "base64Decode" "base64Encode" +"base64URLDecode" +"base64URLEncode" "basename" "bin" "bitAnd" @@ -744,8 +750,15 @@ "cbrt" "ceil" "ceiling" +"changeDay" +"changeHour" +"changeMinute" +"changeMonth" +"changeSecond" +"changeYear" "char" "cityHash64" +"clamp" "coalesce" "concat" "concatAssumeInjective" @@ -970,6 +983,7 @@ "current_date" "current_schemas" "current_timestamp" +"current_user" "cutFragment" "cutIPv6" "cutQueryString" @@ -988,7 +1002,9 @@ "dateDiff" "dateName" "dateTime64ToSnowflake" +"dateTime64ToSnowflakeID" "dateTimeToSnowflake" +"dateTimeToSnowflakeID" "dateTrunc" "date_diff" "decodeHTMLComponent" @@ -1032,6 +1048,21 @@ "deltaSumTimestampSimpleState" "deltaSumTimestampState" "demangle" +"denseRank" +"denseRankArgMax" +"denseRankArgMin" +"denseRankArray" +"denseRankDistinct" +"denseRankForEach" +"denseRankIf" +"denseRankMap" +"denseRankMerge" +"denseRankNull" +"denseRankOrDefault" +"denseRankOrNull" +"denseRankResample" +"denseRankSimpleState" +"denseRankState" "dense_rank" "dense_rankArgMax" "dense_rankArgMin" @@ -1108,8 +1139,11 @@ "domainWithoutWWWRFC" "dotProduct" "dumpColumnStructure" +"dynamicElement" +"dynamicType" "e" "editDistance" +"editDistanceUTF8" "empty" "emptyArrayDate" "emptyArrayDateTime" @@ -1334,14 +1368,17 @@ "gccMurmurHash" "gcd" "generateRandomStructure" +"generateSnowflakeID" "generateULID" "generateUUIDv4" +"generateUUIDv7" "geoDistance" "geoToH3" "geoToS2" "geohashDecode" "geohashEncode" "geohashesInBox" +"getClientHTTPHeader" "getMacro" "getOSKernelVersion" "getServerPort" @@ -1589,6 +1626,20 @@ "groupBitmapXorSimpleState" "groupBitmapXorState" "groupConcat" +"groupConcatArgMax" +"groupConcatArgMin" +"groupConcatArray" +"groupConcatDistinct" +"groupConcatForEach" +"groupConcatIf" +"groupConcatMap" +"groupConcatMerge" +"groupConcatNull" +"groupConcatOrDefault" +"groupConcatOrNull" +"groupConcatResample" +"groupConcatSimpleState" +"groupConcatState" "groupUniqArray" "groupUniqArrayArgMax" "groupUniqArrayArgMin" @@ -1604,6 +1655,21 @@ "groupUniqArrayResample" "groupUniqArraySimpleState" "groupUniqArrayState" +"group_concat" +"group_concatArgMax" +"group_concatArgMin" +"group_concatArray" +"group_concatDistinct" +"group_concatForEach" +"group_concatIf" +"group_concatMap" +"group_concatMerge" +"group_concatNull" +"group_concatOrDefault" +"group_concatOrNull" +"group_concatResample" +"group_concatSimpleState" +"group_concatState" "h3CellAreaM2" "h3CellAreaRads2" "h3Distance" @@ -1660,6 +1726,8 @@ "hasTokenCaseInsensitiveOrNull" "hasTokenOrNull" "hex" +"hilbertDecode" +"hilbertEncode" "histogram" "histogramArgMax" "histogramArgMin" @@ -1881,6 +1949,7 @@ "less" "lessOrEquals" "levenshteinDistance" +"levenshteinDistanceUTF8" "lgamma" "like" "ln" @@ -2498,10 +2567,44 @@ "parseDateTimeInJodaSyntaxOrZero" "parseDateTimeOrNull" "parseDateTimeOrZero" +"parseReadableSize" +"parseReadableSizeOrNull" +"parseReadableSizeOrZero" "parseTimeDelta" +"partitionID" "partitionId" "path" "pathFull" +"percentRank" +"percentRankArgMax" +"percentRankArgMin" +"percentRankArray" +"percentRankDistinct" +"percentRankForEach" +"percentRankIf" +"percentRankMap" +"percentRankMerge" +"percentRankNull" +"percentRankOrDefault" +"percentRankOrNull" +"percentRankResample" +"percentRankSimpleState" +"percentRankState" +"percent_rank" +"percent_rankArgMax" +"percent_rankArgMin" +"percent_rankArray" +"percent_rankDistinct" +"percent_rankForEach" +"percent_rankIf" +"percent_rankMap" +"percent_rankMerge" +"percent_rankNull" +"percent_rankOrDefault" +"percent_rankOrNull" +"percent_rankResample" +"percent_rankSimpleState" +"percent_rankState" "pi" "plus" "pmod" @@ -2533,6 +2636,7 @@ "positive_modulo" "pow" "power" +"printf" "proportionsZTest" "protocol" "punycodeDecode" @@ -3103,6 +3207,8 @@ "rankResample" "rankSimpleState" "rankState" +"readWKTLineString" +"readWKTMultiLineString" "readWKTMultiPolygon" "readWKTPoint" "readWKTPolygon" @@ -3340,6 +3446,8 @@ "skewSampState" "sleep" "sleepEachRow" +"snowflakeIDToDateTime" +"snowflakeIDToDateTime64" "snowflakeToDateTime" "snowflakeToDateTime64" "soundex" @@ -3902,6 +4010,7 @@ "truncate" "tryBase58Decode" "tryBase64Decode" +"tryBase64URLDecode" "tryDecrypt" "tryIdnaEncode" "tryPunycodeDecode" @@ -3923,6 +4032,7 @@ "tupleModuloByNumber" "tupleMultiply" "tupleMultiplyByNumber" +"tupleNames" "tupleNegate" "tuplePlus" "tupleToNameValuePairs" diff --git a/tests/fuzz/dictionaries/key_words.dict b/tests/fuzz/dictionaries/keywords.dict similarity index 95% rename from tests/fuzz/dictionaries/key_words.dict rename to tests/fuzz/dictionaries/keywords.dict index db517a2382c..abaaf9e53b5 100644 --- a/tests/fuzz/dictionaries/key_words.dict +++ b/tests/fuzz/dictionaries/keywords.dict @@ -3,7 +3,7 @@ "ADD CONSTRAINT" "ADD INDEX" "ADD PROJECTION" -"ADD STATISTIC" +"ADD STATISTICS" "ADMIN OPTION FOR" "AFTER" "ALGORITHM" @@ -76,7 +76,7 @@ "CLEAR COLUMN" "CLEAR INDEX" "CLEAR PROJECTION" -"CLEAR STATISTIC" +"CLEAR STATISTICS" "CLUSTER" "CLUSTERS" "CN" @@ -110,6 +110,8 @@ "CURRENTUSER" "CURRENT_USER" "D" +"DATA" +"DATA INNER UUID" "DATABASE" "DATABASES" "DATE" @@ -147,7 +149,7 @@ "DROP PART" "DROP PARTITION" "DROP PROJECTION" -"DROP STATISTIC" +"DROP STATISTICS" "DROP TABLE" "DROP TEMPORARY TABLE" "ELSE" @@ -247,6 +249,7 @@ "IS NULL" "IS_OBJECT_ID" "JOIN" +"JWT" "KERBEROS" "KEY" "KEY BY" @@ -277,13 +280,15 @@ "MATERIALIZE COLUMN" "MATERIALIZE INDEX" "MATERIALIZE PROJECTION" -"MATERIALIZE STATISTIC" +"MATERIALIZE STATISTICS" "MATERIALIZE TTL" "MATERIALIZED" "MAX" "MCS" "MEMORY" "MERGES" +"METRICS" +"METRICS INNER UUID" "MI" "MICROSECOND" "MICROSECONDS" @@ -297,12 +302,14 @@ "MODIFY" "MODIFY COLUMN" "MODIFY COMMENT" +"MODIFY DEFINER" "MODIFY ORDER BY" "MODIFY QUERY" "MODIFY REFRESH" "MODIFY SAMPLE BY" "MODIFY SETTING" "MODIFY SQL SECURITY" +"MODIFY STATISTICS" "MODIFY TTL" "MONTH" "MONTHS" @@ -373,6 +380,7 @@ "Protobuf" "Q" "QQ" +"QUALIFY" "QUARTER" "QUARTERS" "QUERY" @@ -384,6 +392,7 @@ "READONLY" "REALM" "RECOMPRESS" +"RECURSIVE" "REFERENCES" "REFRESH" "REGEXP" @@ -415,6 +424,7 @@ "SALT" "SAMPLE" "SAMPLE BY" +"SAN" "SCHEME" "SECOND" "SECONDS" @@ -460,7 +470,8 @@ "SS" "SSH_KEY" "SSL_CERTIFICATE" -"STATISTIC" +"START TRANSACTION" +"STATISTICS" "STEP" "STORAGE" "STRICT" @@ -475,6 +486,8 @@ "TABLE" "TABLE OVERRIDE" "TABLES" +"TAGS" +"TAGS INNER UUID" "TEMPORARY" "TEMPORARY TABLE" "TEST" @@ -529,6 +542,7 @@ "WITH NAME" "WITH REPLACE OPTION" "WITH TIES" +"WITH_ITEMINDEX" "WK" "WRITABLE" "WW" @@ -540,4 +554,3 @@ "bagexpansion" "base_backup" "cluster_host_ids" -"with_itemindex" diff --git a/utils/libfuzzer/runner.py b/tests/fuzz/runner.py similarity index 75% rename from utils/libfuzzer/runner.py rename to tests/fuzz/runner.py index bbe648dbbc2..44259228f60 100644 --- a/utils/libfuzzer/runner.py +++ b/tests/fuzz/runner.py @@ -11,7 +11,7 @@ FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") def run_fuzzer(fuzzer: str): - logging.info(f"Running fuzzer {fuzzer}...") + logging.info("Running fuzzer %s...", fuzzer) corpus_dir = f"{fuzzer}.in" with Path(corpus_dir) as path: @@ -29,28 +29,27 @@ def run_fuzzer(fuzzer: str): if parser.has_section("asan"): os.environ["ASAN_OPTIONS"] = ( - f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + f"{os.environ['ASAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['asan'].items())}" ) if parser.has_section("msan"): os.environ["MSAN_OPTIONS"] = ( - f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + f"{os.environ['MSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['msan'].items())}" ) if parser.has_section("ubsan"): os.environ["UBSAN_OPTIONS"] = ( - f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + f"{os.environ['UBSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['ubsan'].items())}" ) if parser.has_section("libfuzzer"): custom_libfuzzer_options = " ".join( - "-%s=%s" % (key, value) - for key, value in parser["libfuzzer"].items() + f"-{key}={value}" for key, value in parser["libfuzzer"].items() ) if parser.has_section("fuzzer_arguments"): fuzzer_arguments = " ".join( - ("%s" % key) if value == "" else ("%s=%s" % (key, value)) + (f"{key}") if value == "" else (f"{key}={value}") for key, value in parser["fuzzer_arguments"].items() ) @@ -65,7 +64,7 @@ def run_fuzzer(fuzzer: str): cmd_line += " < /dev/null" - logging.info(f"...will execute: {cmd_line}") + logging.info("...will execute: %s", cmd_line) subprocess.check_call(cmd_line, shell=True) diff --git a/tests/fuzz/tcp_protocol_fuzzer.options b/tests/fuzz/tcp_protocol_fuzzer.options new file mode 100644 index 00000000000..4885669d91d --- /dev/null +++ b/tests/fuzz/tcp_protocol_fuzzer.options @@ -0,0 +1,4 @@ +[fuzzer_arguments] +--log-file=tcp_protocol_fuzzer.log +--= +--logging.terminal=0 diff --git a/tests/fuzz/update_dict.sh b/tests/fuzz/update_dict.sh new file mode 100755 index 00000000000..a83c9167129 --- /dev/null +++ b/tests/fuzz/update_dict.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR=$(dirname "$(realpath "$0")") +ROOT_PATH="$(git rev-parse --show-toplevel)" +CLICKHOUSE_BIN="${CLICKHOUSE_BIN:-$ROOT_PATH/build/programs/clickhouse}" +DICTIONARIES_DIR="$SCRIPT_DIR/dictionaries" + +echo "Generating functions dict" +$CLICKHOUSE_BIN local -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > "$DICTIONARIES_DIR/functions.dict" + +echo "Generating data types dict" +$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > "$DICTIONARIES_DIR/datatypes.dict" + +echo "Generating keywords dict" +$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > "$DICTIONARIES_DIR/keywords.dict" + +echo "Merging dictionaries into all.dict" +cat "$DICTIONARIES_DIR"/* | LC_ALL=C sort | uniq > "$SCRIPT_DIR/all.dict" \ No newline at end of file diff --git a/tests/integration/compose/docker_compose_prometheus.yml b/tests/integration/compose/docker_compose_prometheus.yml new file mode 100644 index 00000000000..0a1db2138ba --- /dev/null +++ b/tests/integration/compose/docker_compose_prometheus.yml @@ -0,0 +1,57 @@ +version: '2.3' +services: + prometheus_writer: + image: prom/prometheus:v2.50.1 + hostname: ${PROMETHEUS_WRITER_HOSTNAME:-prometheus_writer} + restart: always + entrypoint: | + /bin/sh -c 'truncate -s 0 /etc/prometheus/prometheus.yml + cat << EOF >> /etc/prometheus/prometheus.yml + global: + scrape_interval: 1s + scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:${PROMETHEUS_WRITER_PORT}"] + EOF + if [ -n "${PROMETHEUS_REMOTE_WRITE_HANDLER}" ]; then + echo "remote_write:" >> /etc/prometheus/prometheus.yml + echo " - url: \"${PROMETHEUS_REMOTE_WRITE_HANDLER}\"" >> /etc/prometheus/prometheus.yml + fi + #cat "/etc/prometheus/prometheus.yml" + /bin/prometheus --config.file="/etc/prometheus/prometheus.yml" --storage.tsdb.path="/prometheus" --web.console.libraries="/usr/share/prometheus/console_libraries" --web.console.templates="/usr/share/prometheus/consoles" --web.listen-address="0.0.0.0:${PROMETHEUS_WRITER_PORT}" &> /var/log/prometheus/prometheus.log' + expose: + - ${PROMETHEUS_WRITER_PORT} + healthcheck: + test: curl -f "ttps://localhost:${PROMETHEUS_WRITER_PORT}/api/v1/status/runtimeinfo" || exit 1 + interval: 5s + timeout: 3s + retries: 30 + volumes: + - type: ${PROMETHEUS_WRITER_LOGS_FS:-tmpfs} + source: ${PROMETHEUS_WRITER_LOGS:-} + target: /var/log/prometheus + + prometheus_reader: + image: prom/prometheus:v2.50.1 + hostname: ${PROMETHEUS_READER_HOSTNAME:-prometheus_reader} + restart: always + entrypoint: | + /bin/sh -c 'truncate -s 0 /etc/prometheus/prometheus.yml + if [ -n "${PROMETHEUS_REMOTE_READ_HANDLER}" ]; then + echo "remote_read:" >> /etc/prometheus/prometheus.yml + echo " - url: \"${PROMETHEUS_REMOTE_READ_HANDLER}\"" >> /etc/prometheus/prometheus.yml + fi + #cat "/etc/prometheus/prometheus.yml" + /bin/prometheus --config.file="/etc/prometheus/prometheus.yml" --storage.tsdb.path="/prometheus" --web.console.libraries="/usr/share/prometheus/console_libraries" --web.console.templates="/usr/share/prometheus/consoles" --web.listen-address="0.0.0.0:${PROMETHEUS_READER_PORT}" &> /var/log/prometheus/prometheus.log' + expose: + - ${PROMETHEUS_READER_PORT} + healthcheck: + test: curl -f "ttps://localhost:${PROMETHEUS_READER_PORT}/api/v1/status/runtimeinfo" || exit 1 + interval: 5s + timeout: 3s + retries: 30 + volumes: + - type: ${PROMETHEUS_READER_LOGS_FS:-tmpfs} + source: ${PROMETHEUS_READER_LOGS:-} + target: /var/log/prometheus diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5e0352df617..53f4f1e1f26 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -570,6 +570,8 @@ class ClickHouseCluster: self.spark_session = None self.with_azurite = False + self.azurite_container = "azurite-container" + self.blob_service_client = None self._azurite_port = 0 # available when with_hdfs == True @@ -737,6 +739,25 @@ class ClickHouseCluster: self.jdbc_driver_dir = p.abspath(p.join(self.instances_dir, "jdbc_driver")) self.jdbc_driver_logs_dir = os.path.join(self.jdbc_driver_dir, "logs") + # available when with_prometheus == True + self.with_prometheus = False + self.prometheus_writer_host = "prometheus_writer" + self.prometheus_writer_port = 9090 + self.prometheus_writer_logs_dir = p.abspath( + p.join(self.instances_dir, "prometheus_writer/logs") + ) + self.prometheus_reader_host = "prometheus_reader" + self.prometheus_reader_port = 9091 + self.prometheus_reader_logs_dir = p.abspath( + p.join(self.instances_dir, "prometheus_reader/logs") + ) + self.prometheus_remote_write_handler_host = None + self.prometheus_remote_write_handler_port = 9092 + self.prometheus_remote_write_handler_path = "/write" + self.prometheus_remote_read_handler_host = None + self.prometheus_remote_read_handler_port = 9092 + self.prometheus_remote_read_handler_path = "/read" + self.docker_client = None self.is_up = False self.env = os.environ.copy() @@ -1619,6 +1640,42 @@ class ClickHouseCluster: ] return self.base_hive_cmd + def setup_prometheus_cmd(self, instance, env_variables, docker_compose_yml_dir): + env_variables["PROMETHEUS_WRITER_HOST"] = self.prometheus_writer_host + env_variables["PROMETHEUS_WRITER_PORT"] = str(self.prometheus_writer_port) + env_variables["PROMETHEUS_WRITER_LOGS"] = self.prometheus_writer_logs_dir + env_variables["PROMETHEUS_WRITER_LOGS_FS"] = "bind" + env_variables["PROMETHEUS_READER_HOST"] = self.prometheus_reader_host + env_variables["PROMETHEUS_READER_PORT"] = str(self.prometheus_reader_port) + env_variables["PROMETHEUS_READER_LOGS"] = self.prometheus_reader_logs_dir + env_variables["PROMETHEUS_READER_LOGS_FS"] = "bind" + if self.prometheus_remote_write_handler_host: + env_variables["PROMETHEUS_REMOTE_WRITE_HANDLER"] = ( + f"http://{self.prometheus_remote_write_handler_host}:{self.prometheus_remote_write_handler_port}/{self.prometheus_remote_write_handler_path.strip('/')}" + ) + if self.prometheus_remote_read_handler_host: + env_variables["PROMETHEUS_REMOTE_READ_HANDLER"] = ( + f"http://{self.prometheus_remote_read_handler_host}:{self.prometheus_remote_read_handler_port}/{self.prometheus_remote_read_handler_path.strip('/')}" + ) + if not self.with_prometheus: + self.with_prometheus = True + self.base_cmd.extend( + [ + "--file", + p.join(docker_compose_yml_dir, "docker_compose_prometheus.yml"), + ] + ) + self.base_prometheus_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_prometheus.yml"), + ] + return self.base_prometheus_cmd + def add_instance( self, name, @@ -1659,6 +1716,9 @@ class ClickHouseCluster: with_jdbc_bridge=False, with_hive=False, with_coredns=False, + with_prometheus=False, + handle_prometheus_remote_write=False, + handle_prometheus_remote_read=False, use_old_analyzer=None, hostname=None, env_variables=None, @@ -2001,6 +2061,17 @@ class ClickHouseCluster: self.setup_hive(instance, env_variables, docker_compose_yml_dir) ) + if with_prometheus: + if handle_prometheus_remote_write: + self.prometheus_remote_write_handler_host = instance.hostname + if handle_prometheus_remote_read: + self.prometheus_remote_read_handler_host = instance.hostname + cmds.append( + self.setup_prometheus_cmd( + instance, env_variables, docker_compose_yml_dir + ) + ) + logging.debug( "Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format( self.name, @@ -2623,6 +2694,32 @@ class ClickHouseCluster: connection_string ) logging.debug(blob_service_client.get_account_information()) + containers = [ + c + for c in blob_service_client.list_containers( + name_starts_with=self.azurite_container + ) + if c.name == self.azurite_container + ] + if len(containers) > 0: + for c in containers: + blob_service_client.delete_container(c) + + container_client = blob_service_client.get_container_client( + self.azurite_container + ) + if container_client.exists(): + logging.debug( + f"azurite container '{self.azurite_container}' exist, deleting all blobs" + ) + for b in container_client.list_blobs(): + container_client.delete_blob(b.name) + else: + logging.debug( + f"azurite container '{self.azurite_container}' doesn't exist, creating it" + ) + container_client.create_container() + self.blob_service_client = blob_service_client return except Exception as ex: @@ -3064,6 +3161,12 @@ class ClickHouseCluster: f"http://{self.jdbc_bridge_ip}:{self.jdbc_bridge_port}/ping" ) + if self.with_prometheus: + os.makedirs(self.prometheus_writer_logs_dir) + os.chmod(self.prometheus_writer_logs_dir, stat.S_IRWXU | stat.S_IRWXO) + os.makedirs(self.prometheus_reader_logs_dir) + os.chmod(self.prometheus_reader_logs_dir, stat.S_IRWXU | stat.S_IRWXO) + clickhouse_start_cmd = self.base_cmd + ["up", "-d", "--no-recreate"] logging.debug( ( diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..507894534d4 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -1,6 +1,7 @@ [ "test_dns_cache/test.py::test_dns_cache_update", "test_dns_cache/test.py::test_ip_change_drop_dns_cache", + "test_dns_cache/test.py::test_dns_resolver_filter", "test_dns_cache/test.py::test_ip_change_update_dns_cache", "test_dns_cache/test.py::test_user_access_ip_change[node0]", "test_dns_cache/test.py::test_user_access_ip_change[node1]", @@ -162,9 +163,13 @@ "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", "test_storage_kafka/test.py::test_formats_errors", "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", + "test_storage_kafka/test.py::test_kafka_null_message", + + "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" + ] diff --git a/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.reference b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py similarity index 100% rename from tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.reference rename to tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml new file mode 100644 index 00000000000..4ca4f604ec3 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml @@ -0,0 +1,26 @@ + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml new file mode 100644 index 00000000000..c5de0b6819c --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml @@ -0,0 +1,8 @@ + + + + default + + + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py new file mode 100644 index 00000000000..e0c15e18c23 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py @@ -0,0 +1,71 @@ +import pytest +import random +import string + +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "config/enable_keeper.xml", + "config/users.xml", + ], + stay_alive=True, + with_minio=True, + macros={"shard": 1, "replica": 1}, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def randomize_table_name(table_name, random_suffix_length=10): + letters = string.ascii_letters + string.digits + return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}" + + +@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"]) +def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine): + """ + Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk. + Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150 + """ + + data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}' + + table_name = randomize_table_name("t") + + node.query( + f""" + DROP TABLE IF EXISTS {table_name}; + CREATE TABLE {table_name} + ( + `data` String, + `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'), + `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key') + ) + ENGINE = {engine}('/test/{table_name}', '{{replica}}') + ORDER BY col1 + """ + ) + + node.restart_clickhouse() + + node.query( + f""" + INSERT INTO {table_name} (data) VALUES ('{data}'); + """ + ) + assert node.query(f"SELECT data FROM {table_name}").strip() == data + assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val" + assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val" + + node.query(f"DROP TABLE {table_name}") diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/integration/test_async_metrics_in_cgroup/__init__.py similarity index 100% rename from tests/queries/0_stateless/02864_statistics_exception.reference rename to tests/integration/test_async_metrics_in_cgroup/__init__.py diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py new file mode 100644 index 00000000000..d9f2e3aaaed --- /dev/null +++ b/tests/integration/test_async_metrics_in_cgroup/test.py @@ -0,0 +1,69 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", stay_alive=True) +node2 = cluster.add_instance("node2", stay_alive=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def run_cpu_intensive_task(node): + node.query( + "SELECT sum(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10", + ignore_error=True, + ) + + +def get_async_metric(node, metric): + node.query("SYSTEM FLUSH LOGS") + return node.query( + f""" + SELECT max(value) + FROM ( + SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = '{metric}' + GROUP BY t + ) + SETTINGS max_threads = 1 + """ + ).strip("\n") + + +def test_user_cpu_accounting(start_cluster): + if node1.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + # run query on the other node, its usage shouldn't be accounted by node1 + run_cpu_intensive_task(node2) + + node1_cpu_time = get_async_metric(node1, "OSUserTime") + assert float(node1_cpu_time) < 2 + + # then let's test that we will account cpu time spent by the server itself + node2_cpu_time = get_async_metric(node2, "OSUserTime") + # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time + assert float(node2_cpu_time) > 2 + + +def test_normalized_user_cpu(start_cluster): + if node1.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + # run query on the other node, its usage shouldn't be accounted by node1 + run_cpu_intensive_task(node2) + + node1_cpu_time = get_async_metric(node1, "OSUserTimeNormalized") + assert float(node1_cpu_time) < 1.01 + + node2_cpu_time = get_async_metric(node2, "OSUserTimeNormalized") + assert float(node2_cpu_time) < 1.01 diff --git a/tests/integration/test_broken_projections/config.d/dont_start_broken.xml b/tests/integration/test_broken_projections/config.d/dont_start_broken.xml new file mode 100644 index 00000000000..9603cdc7e3e --- /dev/null +++ b/tests/integration/test_broken_projections/config.d/dont_start_broken.xml @@ -0,0 +1,6 @@ + + + + 0 + + diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index 162c0dbaa2f..578ff42369c 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -4,6 +4,7 @@ import logging import string import random from helpers.cluster import ClickHouseCluster +from multiprocessing.dummy import Pool cluster = ClickHouseCluster(__file__) @@ -18,6 +19,12 @@ def cluster(): stay_alive=True, with_zookeeper=True, ) + cluster.add_instance( + "node_restart", + main_configs=["config.d/dont_start_broken.xml"], + stay_alive=True, + with_zookeeper=True, + ) logging.info("Starting cluster...") cluster.start() @@ -632,6 +639,49 @@ def test_broken_on_start(cluster): check(node, table_name, 0) +def test_disappeared_projection_on_start(cluster): + node = cluster.instances["node_restart"] + + table_name = "test_disapperead_projection" + create_table(node, table_name, 1) + + node.query(f"SYSTEM STOP MERGES {table_name}") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + def drop_projection(): + node.query( + f"ALTER TABLE {table_name} DROP PROJECTION proj2", + settings={"mutations_sync": "0"}, + ) + + p = Pool(2) + p.apply_async(drop_projection) + + for i in range(30): + create_query = node.query(f"SHOW CREATE TABLE {table_name}") + if "proj2" not in create_query: + break + time.sleep(0.5) + + assert "proj2" not in create_query + + # Remove 'proj2' for part all_2_2_0 + break_projection(node, table_name, "proj2", "all_2_2_0", "part") + + node.restart_clickhouse() + + # proj2 is not broken, it doesn't exist, but ok + check(node, table_name, 0, expect_broken_part="proj2", do_check_command=0) + + def test_mutation_with_broken_projection(cluster): node = cluster.instances["node"] diff --git a/tests/integration/test_cgroup_limit/test.py b/tests/integration/test_cgroup_limit/test.py index e77b0f70960..5d56135d9ff 100644 --- a/tests/integration/test_cgroup_limit/test.py +++ b/tests/integration/test_cgroup_limit/test.py @@ -46,7 +46,7 @@ def test_cgroup_cpu_limit(): "clickhouse local -q \"select value from system.settings where name='max_threads'\"", num_cpus, ) - expect_output = (r"\'auto({})\'".format(math.ceil(num_cpus))).encode() + expect_output = (r"auto({})".format(math.ceil(num_cpus))).encode() assert ( result.strip() == expect_output ), f"fail for cpu limit={num_cpus}, result={result.strip()}, expect={expect_output}" diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 73f2888ce00..76a0f30f82e 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -708,7 +708,7 @@ def test_no_key_found_disk(cluster, broken_s3): """ SELECT value FROM system.metrics - WHERE metric = 'S3DiskNoKeyErrors' + WHERE metric = 'DiskS3NoSuchKeyErrors' """ ).strip() ) diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index d8bad180e1b..9797db7c498 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -21,14 +21,14 @@ def start_cluster(): def test_cluster(start_cluster): assert ( node1.query( - "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one)" + "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one) ORDER BY ALL" ) == "node1\nnode2\n" ) assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY dummy""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -48,7 +48,7 @@ def test_global_in(start_cluster): assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -63,7 +63,7 @@ def test_global_in(start_cluster): def test_skip_unavailable_replica(start_cluster, cluster): assert ( node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=1" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=1" ) == "node1\nnode2\n" ) @@ -81,5 +81,5 @@ def test_error_on_unavailable_replica(start_cluster, cluster): # so when skip_unavailable_shards=0 - any unavailable replica should lead to an error with pytest.raises(QueryRuntimeException): node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=0" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=0" ) diff --git a/tests/integration/test_delayed_replica_failover/test.py b/tests/integration/test_delayed_replica_failover/test.py index a480ee3f278..f1034e26b25 100644 --- a/tests/integration/test_delayed_replica_failover/test.py +++ b/tests/integration/test_delayed_replica_failover/test.py @@ -20,21 +20,30 @@ node_1_2 = cluster.add_instance("node_1_2", with_zookeeper=True) node_2_1 = cluster.add_instance("node_2_1", with_zookeeper=True) node_2_2 = cluster.add_instance("node_2_2", with_zookeeper=True) +# For test to be runnable multiple times +seqno = 0 + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + yield cluster + finally: + cluster.shutdown() + +@pytest.fixture(scope="function", autouse=True) +def create_tables(): + global seqno + try: + seqno += 1 for shard in (1, 2): for replica in (1, 2): node = cluster.instances["node_{}_{}".format(shard, replica)] node.query( - """ -CREATE TABLE replicated (d Date, x UInt32) ENGINE = - ReplicatedMergeTree('/clickhouse/tables/{shard}/replicated', '{instance}') PARTITION BY toYYYYMM(d) ORDER BY d""".format( - shard=shard, instance=node.name - ) + f"CREATE TABLE replicated (d Date, x UInt32) ENGINE = " + f"ReplicatedMergeTree('/clickhouse/tables/{shard}/replicated_{seqno}', '{node.name}') PARTITION BY toYYYYMM(d) ORDER BY d" ) node_1_1.query( @@ -42,10 +51,15 @@ CREATE TABLE replicated (d Date, x UInt32) ENGINE = "Distributed('test_cluster', 'default', 'replicated')" ) - yield cluster + yield finally: - cluster.shutdown() + node_1_1.query("DROP TABLE distributed") + + node_1_1.query("DROP TABLE replicated") + node_1_2.query("DROP TABLE replicated") + node_2_1.query("DROP TABLE replicated") + node_2_2.query("DROP TABLE replicated") def test(started_cluster): @@ -101,7 +115,9 @@ SELECT sum(x) FROM distributed WITH TOTALS SETTINGS # allow pings to zookeeper to timeout (must be greater than ZK session timeout). for _ in range(30): try: - node_2_2.query("SELECT * FROM system.zookeeper where path = '/'") + node_2_2.query( + "SELECT * FROM system.zookeeper where path = '/' SETTINGS insert_keeper_max_retries = 0" + ) time.sleep(0.5) except: break @@ -120,7 +136,7 @@ SELECT sum(x) FROM distributed SETTINGS == "3" ) - # Regression for skip_unavailable_shards in conjunction with skip_unavailable_shards + # Prefer fallback_to_stale_replicas over skip_unavailable_shards assert ( instance_with_dist_table.query( """ diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index 516ac27ea26..010ecdb5084 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -530,10 +530,61 @@ def test_bad_configuration(started_cluster): """ ) - node1.query_and_get_error( + assert "Unexpected key `dbbb`" in node1.query_and_get_error( "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(1))" ) - assert node1.contains_in_log("Unexpected key `dbbb`") + + +def test_named_collection_from_ddl(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS test_table") + cursor.execute("CREATE TABLE test_table (id integer, value integer)") + + node1.query( + """ + DROP NAMED COLLECTION IF EXISTS pg_conn; + CREATE NAMED COLLECTION pg_conn + AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, database = 'postgres', table = 'test_table'; + """ + ) + + cursor.execute( + "INSERT INTO test_table SELECT i, i FROM generate_series(0, 99) as t(i)" + ) + + node1.query( + """ + DROP DICTIONARY IF EXISTS postgres_dict; + CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(POSTGRESQL(NAME pg_conn)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + """ + ) + result = node1.query("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") + assert int(result.strip()) == 99 + + node1.query( + """ + DROP NAMED COLLECTION IF EXISTS pg_conn_2; + CREATE NAMED COLLECTION pg_conn_2 + AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, dbbb = 'postgres', table = 'test_table'; + """ + ) + node1.query( + """ + DROP DICTIONARY IF EXISTS postgres_dict; + CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(POSTGRESQL(NAME pg_conn_2)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + """ + ) + assert "Unexpected key `dbbb`" in node1.query_and_get_error( + "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))" + ) if __name__ == "__main__": diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py index afc5303298c..f297c665dc5 100644 --- a/tests/integration/test_disk_configuration/test.py +++ b/tests/integration/test_disk_configuration/test.py @@ -373,7 +373,7 @@ def test_merge_tree_setting_override(start_cluster): CREATE TABLE {TABLE_NAME} (a Int32) ENGINE = MergeTree() ORDER BY tuple() - SETTINGS disk = 'kek', storage_policy = 's3'; + SETTINGS disk = 's3', storage_policy = 's3'; """ ) ) diff --git a/tests/integration/test_distributed_type_object/configs/remote_servers.xml b/tests/integration/test_distributed_type_object/configs/remote_servers.xml index ebce4697529..68b420f36b4 100644 --- a/tests/integration/test_distributed_type_object/configs/remote_servers.xml +++ b/tests/integration/test_distributed_type_object/configs/remote_servers.xml @@ -1,4 +1,4 @@ - + @@ -15,4 +15,4 @@ - + diff --git a/tests/integration/test_distributed_type_object/test.py b/tests/integration/test_distributed_type_object/test.py index e274bd6b774..64acdda887b 100644 --- a/tests/integration/test_distributed_type_object/test.py +++ b/tests/integration/test_distributed_type_object/test.py @@ -16,7 +16,7 @@ def started_cluster(): for node in (node1, node2): node.query( - "CREATE TABLE local_table(id UInt32, data JSON) ENGINE = MergeTree ORDER BY id", + "CREATE TABLE local_table(id UInt32, data Object('json')) ENGINE = MergeTree ORDER BY id", settings={"allow_experimental_object_type": 1}, ) node.query( diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index a6db26c8575..36401517429 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -317,3 +317,74 @@ def test_host_is_drop_from_cache_after_consecutive_failures( assert node4.wait_for_log_line( "Cached hosts dropped:.*InvalidHostThatDoesNotExist.*" ) + + +node7 = cluster.add_instance( + "node7", + main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], + with_zookeeper=True, + ipv6_address="2001:3984:3989::1:1117", + ipv4_address="10.5.95.17", +) + + +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + node = node7 + host_ipv6 = node.ipv6_address + host_ipv4 = node.ipv4_address + + node.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node.query("SYSTEM RELOAD CONFIG") + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node.query("SYSTEM RELOAD CONFIG") diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 1bb8767a9a0..3855bc21f90 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -176,7 +176,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0, max_threads=1; """, query_id=query_id, ) diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 17a8dd8b6e1..aee8bd25c2e 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -1,6 +1,7 @@ import logging import time import os +import random import pytest from helpers.cluster import ClickHouseCluster @@ -30,14 +31,6 @@ def cluster(): "config.d/storage_conf_2.xml", ], ) - cluster.add_instance( - "node_no_filesystem_caches_path", - main_configs=[ - "config.d/storage_conf.xml", - "config.d/remove_filesystem_caches_path.xml", - ], - stay_alive=True, - ) cluster.add_instance( "node_force_read_through_cache_on_merge", main_configs=[ @@ -59,6 +52,51 @@ def cluster(): cluster.shutdown() +@pytest.fixture(scope="function") +def non_shared_cluster(): + """ + For tests that cannot run in parallel against the same node/cluster (see test_custom_cached_disk, which relies on + changing server settings at runtime) + """ + try: + # Randomize the cluster name + cluster = ClickHouseCluster(f"{__file__}_non_shared_{random.randint(0, 10**7)}") + cluster.add_instance( + "node_no_filesystem_caches_path", + main_configs=[ + "config.d/storage_conf.xml", + "config.d/remove_filesystem_caches_path.xml", + ], + stay_alive=True, + ) + + logging.info("Starting test-exclusive cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def wait_for_cache_initialized(node, cache_path, max_attempts=50): + initialized = False + attempts = 0 + while not initialized: + query_result = node.query( + "SELECT path FROM system.filesystem_cache_settings WHERE is_initialized" + ) + initialized = cache_path in query_result + + if initialized: + break + + time.sleep(0.1) + attempts += 1 + if attempts >= max_attempts: + raise "Stopped waiting for cache to be initialized" + + @pytest.mark.parametrize("node_name", ["node"]) def test_parallel_cache_loading_on_startup(cluster, node_name): node = cluster.instances[node_name] @@ -71,14 +109,21 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - path = 'paralel_loading_test', + name = 'parallel_loading_test', + path = 'parallel_loading_test', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', max_size = '1Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "parallel_loading_test") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; SELECT * FROM test FORMAT Null; @@ -103,6 +148,7 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ) node.restart_clickhouse() + wait_for_cache_initialized(node, "parallel_loading_test") # < because of additional files loaded into cache on server startup. assert cache_count <= int(node.query("SELECT count() FROM system.filesystem_cache")) @@ -131,7 +177,7 @@ def test_caches_with_the_same_configuration(cluster, node_name): node = cluster.instances[node_name] cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["test", "test2"]: node.query( f""" @@ -142,14 +188,20 @@ def test_caches_with_the_same_configuration(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - name = {table}, + name = '{table}', path = '{cache_path}', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', cache_on_write_operations=1, max_size = '1Mi'); + """ + ) + wait_for_cache_initialized(node, cache_path) + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -195,9 +247,8 @@ def test_caches_with_the_same_configuration(cluster, node_name): @pytest.mark.parametrize("node_name", ["node_caches_with_same_path"]) def test_caches_with_the_same_configuration_2(cluster, node_name): node = cluster.instances[node_name] - cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["cache1", "cache2"]: node.query( f""" @@ -207,7 +258,13 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): Engine=MergeTree() ORDER BY value SETTINGS disk = '{table}'; + """ + ) + wait_for_cache_initialized(node, "cache1") + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -227,8 +284,8 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): ) -def test_custom_cached_disk(cluster): - node = cluster.instances["node_no_filesystem_caches_path"] +def test_custom_cached_disk(non_shared_cluster): + node = non_shared_cluster.instances["node_no_filesystem_caches_path"] assert "Cannot create cached custom disk without" in node.query_and_get_error( f""" @@ -377,6 +434,7 @@ def test_force_filesystem_cache_on_merges(cluster): ORDER BY value SETTINGS disk = disk( type = cache, + name = 'force_cache_on_merges', path = 'force_cache_on_merges', disk = 'hdd_blob', max_file_segment_size = '1Ki', @@ -385,7 +443,13 @@ def test_force_filesystem_cache_on_merges(cluster): max_size = '10Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "force_cache_on_merges") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; @@ -441,7 +505,13 @@ SETTINGS disk = disk(type = cache, path = "test_system_sync_filesystem_cache", delayed_cleanup_interval_ms = 10000000, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_system_sync_filesystem_cache") + + node.query( + """ INSERT INTO test SELECT 1, 'test'; """ ) @@ -525,7 +595,13 @@ SETTINGS disk = disk(type = cache, keep_free_space_elements_ratio = {elements_ratio}, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_keep_up_size_ratio") + + node.query( + """ INSERT INTO test SELECT randomString(200); """ ) diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index 02ecf3c1367..0d72f7c45b1 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -333,7 +333,7 @@ def test_receive_timeout2(started_cluster): # in packet receiving but there are replicas in process of # connection establishing. update_configs( - node_1_sleep_in_send_data=4000, + node_1_sleep_in_send_data=5000, node_2_sleep_in_send_tables_status=2000, node_3_sleep_in_send_tables_status=2000, ) diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml index 7a2141e6c7e..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_incorrect_datetime_format/__init__.py b/tests/integration/test_incorrect_datetime_format/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml new file mode 100644 index 00000000000..a27968fb3d2 --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml @@ -0,0 +1,11 @@ + + + + + + node + + + + + diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.xml b/tests/integration/test_incorrect_datetime_format/configs/config.xml new file mode 100644 index 00000000000..053b5d30418 --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/configs/config.xml @@ -0,0 +1,9 @@ + + + information + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py new file mode 100644 index 00000000000..3cdc6781534 --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/test.py @@ -0,0 +1,54 @@ +import logging +import pytest +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/cluster.xml", + ], + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE tab + ( + a DateTime, + pk String + ) Engine = MergeTree() ORDER BY pk; + """ + ) + + yield cluster + finally: + cluster.shutdown() + + +def test_incorrect_datetime_format(cluster): + """ + Test for an MSan issue which is caused by parsing incorrect datetime string + """ + + node = cluster.instances["node"] + + res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip() + assert res == "0" + + error = node.query_and_get_error( + "SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'" + ).strip() + assert "Cannot parse time component of DateTime 09:58:0" in error + + error = node.query_and_get_error( + "SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'" + ).strip() + assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error diff --git a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml index b5c351d105b..fb9acc58ad6 100644 --- a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml @@ -1,4 +1,4 @@ - + 1000 @@ -27,4 +27,4 @@ - + diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py index 2541c9b946f..3a1c5ad5b96 100644 --- a/tests/integration/test_manipulate_statistics/test.py +++ b/tests/integration/test_manipulate_statistics/test.py @@ -6,11 +6,17 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", user_configs=["config/config.xml"], with_zookeeper=True + "node1", + user_configs=["config/config.xml"], + with_zookeeper=True, + macros={"replica": "a", "shard": "shard1"}, ) node2 = cluster.add_instance( - "node2", user_configs=["config/config.xml"], with_zookeeper=True + "node2", + user_configs=["config/config.xml"], + with_zookeeper=True, + macros={"replica": "b", "shard": "shard1"}, ) @@ -129,8 +135,8 @@ def test_single_node_normal(started_cluster): def test_replicated_table_ddl(started_cluster): - node1.query("DROP TABLE IF EXISTS test_stat") - node2.query("DROP TABLE IF EXISTS test_stat") + node1.query("DROP TABLE IF EXISTS test_stat SYNC") + node2.query("DROP TABLE IF EXISTS test_stat SYNC") node1.query( """ @@ -183,3 +189,19 @@ def test_replicated_table_ddl(started_cluster): ) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True) + + +def test_replicated_db(started_cluster): + node1.query("DROP DATABASE IF EXISTS test SYNC") + node2.query("DROP DATABASE IF EXISTS test SYNC") + node1.query( + "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')" + ) + node2.query( + "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')" + ) + node1.query( + "CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()" + ) + node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64") + node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest") diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 6f6dc4d287f..8d5345082ff 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -202,6 +202,10 @@ def test_create_table(): f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip') settings mode = 'ordered'", + ( + f"Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", + "DNS_ERROR", + ), ] def make_test_case(i): @@ -266,6 +270,7 @@ def test_create_table(): # due to sensitive data substituion the query will be normalized, so not "settings" but "SETTINGS" "CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV') SETTINGS mode = 'ordered'", "CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip') SETTINGS mode = 'ordered'", + "CREATE TABLE table21 (`x` int) ENGINE = Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", ], must_not_contain=[password], ) @@ -387,6 +392,7 @@ def test_table_functions(): f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')", + f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", ] def make_test_case(i): @@ -478,6 +484,7 @@ def test_table_functions(): f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')", + "CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", ], must_not_contain=[password], ) diff --git a/tests/integration/test_named_collections_encrypted/__init__.py b/tests/integration/test_named_collections_encrypted/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml new file mode 100644 index 00000000000..233e23846cb --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml @@ -0,0 +1,12 @@ + + + local_encrypted + bebec0cabebec0cabebec0cabebec0ca + + + + + value1 + + + diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml new file mode 100644 index 00000000000..d1dd5c29787 --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml @@ -0,0 +1,31 @@ + + + zookeeper_encrypted + bebec0cabebec0cabebec0cabebec0ca + /named_collections_path/ + 5000 + + + + + value1 + + + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + + diff --git a/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml new file mode 100644 index 00000000000..7d4f0543ff1 --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml @@ -0,0 +1,17 @@ + + + + 0 + + + + + + default + default + 1 + 1 + 1 + + + diff --git a/tests/integration/test_named_collections_encrypted/test.py b/tests/integration/test_named_collections_encrypted/test.py new file mode 100644 index 00000000000..7dff32fa6c9 --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/test.py @@ -0,0 +1,123 @@ +import logging +import pytest +import os +from helpers.cluster import ClickHouseCluster + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +NAMED_COLLECTIONS_CONFIG = os.path.join( + SCRIPT_DIR, "./configs/config.d/named_collections.xml" +) + +ZK_PATH = "/named_collections_path" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_encrypted", + main_configs=[ + "configs/config.d/named_collections_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + ) + cluster.add_instance( + "node_with_keeper_encrypted", + main_configs=[ + "configs/config.d/named_collections_with_zookeeper_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + with_zookeeper=True, + ) + cluster.add_instance( + "node_with_keeper_2_encrypted", + main_configs=[ + "configs/config.d/named_collections_with_zookeeper_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + with_zookeeper=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def check_encrypted_content(node, zk=None): + assert ( + "collection1\ncollection2" + == node.query("select name from system.named_collections").strip() + ) + + assert ( + "['key1','key2']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "1234\tvalue2" + == node.query( + "select collection['key1'], collection['key2'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + # Check that the underlying storage is encrypted + content = ( + zk.get(ZK_PATH + "/collection2.sql")[0] + if zk is not None + else open( + f"{node.path}/database/named_collections/collection2.sql", "rb" + ).read() + ) + + assert ( + content[0:3] == b"ENC" + ) # file signature (aka magic number) of the encrypted file + assert b"key1" not in content + assert b"1234" not in content + assert b"key2" not in content + assert b"value2" not in content + + +def test_local_storage_encrypted(cluster): + node = cluster.instances["node_encrypted"] + node.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'") + + check_encrypted_content(node) + node.restart_clickhouse() + check_encrypted_content(node) + + node.query("DROP NAMED COLLECTION collection2") + + +def test_zookeper_storage_encrypted(cluster): + node1 = cluster.instances["node_with_keeper_encrypted"] + node2 = cluster.instances["node_with_keeper_2_encrypted"] + zk = cluster.get_kazoo_client("zoo1") + + node1.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'") + + check_encrypted_content(node1, zk) + check_encrypted_content(node2, zk) + node1.restart_clickhouse() + node2.restart_clickhouse() + check_encrypted_content(node1, zk) + check_encrypted_content(node2, zk) + + node1.query("DROP NAMED COLLECTION collection2") diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 0d0d7a0afb1..9d4ca5ad49f 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -51,9 +51,9 @@ create_table_sql_nullable_template = """ """ -def skip_test_msan(instance): - if instance.is_built_with_memory_sanitizer(): - pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") +def skip_test_sanitizers(instance): + if instance.is_built_with_sanitizer(): + pytest.skip("Sanitizers cannot work with third-party shared libraries") def get_mysql_conn(): @@ -208,7 +208,7 @@ def started_cluster(): def test_mysql_odbc_select_nullable(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert_nullable_select" @@ -248,7 +248,7 @@ def test_mysql_odbc_select_nullable(started_cluster): def test_mysql_simple_select_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] @@ -331,7 +331,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nulla def test_mysql_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert" @@ -374,7 +374,7 @@ def test_mysql_insert(started_cluster): def test_sqlite_simple_select_function_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -438,7 +438,7 @@ def test_sqlite_simple_select_function_works(started_cluster): def test_sqlite_table_function(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -470,7 +470,7 @@ def test_sqlite_table_function(started_cluster): def test_sqlite_simple_select_storage_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -503,7 +503,7 @@ def test_sqlite_simple_select_storage_works(started_cluster): def test_sqlite_odbc_hashed_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -586,7 +586,7 @@ def test_sqlite_odbc_hashed_dictionary(started_cluster): def test_sqlite_odbc_cached_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -635,7 +635,7 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -663,7 +663,7 @@ def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -685,7 +685,7 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): def test_no_connection_pooling(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -717,7 +717,7 @@ def test_no_connection_pooling(started_cluster): def test_postgres_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) @@ -754,7 +754,7 @@ def test_postgres_insert(started_cluster): def test_odbc_postgres_date_data_type(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -783,7 +783,7 @@ def test_odbc_postgres_date_data_type(started_cluster): def test_odbc_postgres_conversions(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -841,7 +841,7 @@ def test_odbc_postgres_conversions(started_cluster): def test_odbc_cyrillic_with_varchar(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -868,7 +868,7 @@ def test_odbc_cyrillic_with_varchar(started_cluster): def test_many_connections(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -894,7 +894,7 @@ def test_many_connections(started_cluster): def test_concurrent_queries(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -948,7 +948,7 @@ def test_concurrent_queries(started_cluster): def test_odbc_long_column_names(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -986,7 +986,7 @@ def test_odbc_long_column_names(started_cluster): def test_odbc_long_text(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 406b50bc486..7fdd17625a9 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -953,12 +953,14 @@ def test_generated_columns(started_cluster): "", f"""CREATE TABLE {table} ( key integer PRIMARY KEY, - x integer, + x integer DEFAULT 0, + temp integer DEFAULT 0, y integer GENERATED ALWAYS AS (x*2) STORED, - z text); + z text DEFAULT 'z'); """, ) + pg_manager.execute(f"alter table {table} drop column temp;") pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');") pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');") @@ -991,6 +993,44 @@ def test_generated_columns(started_cluster): ) +def test_generated_columns_with_sequence(started_cluster): + table = "test_generated_columns_with_sequence" + + pg_manager.create_postgres_table( + table, + "", + f"""CREATE TABLE {table} ( + key integer PRIMARY KEY, + x integer, + y integer GENERATED ALWAYS AS (x*2) STORED, + z text); + """, + ) + + pg_manager.execute( + f"create sequence {table}_id_seq increment by 1 minvalue 1 start 1;" + ) + pg_manager.execute( + f"alter table {table} alter key set default nextval('{table}_id_seq');" + ) + pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');") + pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');") + + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=[ + f"materialized_postgresql_tables_list = '{table}'", + "materialized_postgresql_backoff_min_ms = 100", + "materialized_postgresql_backoff_max_ms = 100", + ], + ) + + check_tables_are_synchronized( + instance, table, postgres_database=pg_manager.get_default_database() + ) + + def test_default_columns(started_cluster): table = "test_default_columns" @@ -1087,9 +1127,13 @@ def test_dependent_loading(started_cluster): nested_time = instance.query( f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{uuid}_nested' and message not like '%like%'" ).strip() - time = instance.query( - f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'" - ).strip() + time = ( + instance.query( + f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'" + ) + .strip() + .split("\n")[-1] + ) instance.query( f"SELECT toDateTime64('{nested_time}', 6) < toDateTime64('{time}', 6)" ) diff --git a/tests/integration/test_prometheus_protocols/__init__.py b/tests/integration/test_prometheus_protocols/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_prometheus_protocols/configs/allow_experimental_time_series_table.xml b/tests/integration/test_prometheus_protocols/configs/allow_experimental_time_series_table.xml new file mode 100644 index 00000000000..d71cfcaf2c8 --- /dev/null +++ b/tests/integration/test_prometheus_protocols/configs/allow_experimental_time_series_table.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_prometheus_protocols/configs/prometheus.xml b/tests/integration/test_prometheus_protocols/configs/prometheus.xml new file mode 100644 index 00000000000..071a29620cf --- /dev/null +++ b/tests/integration/test_prometheus_protocols/configs/prometheus.xml @@ -0,0 +1,21 @@ + + + 9092 + + + /write + + remote_write +
{test_result.status}{test_result.time}{test_result.time}
default.prometheus
+ + + + /read + + remote_read + default.prometheus
+
+
+ + + diff --git a/tests/integration/test_prometheus_protocols/test.py b/tests/integration/test_prometheus_protocols/test.py new file mode 100644 index 00000000000..0c75a8194c7 --- /dev/null +++ b/tests/integration/test_prometheus_protocols/test.py @@ -0,0 +1,177 @@ +import pytest +import time +import requests +from http import HTTPStatus +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/prometheus.xml"], + user_configs=["configs/allow_experimental_time_series_table.xml"], + with_prometheus=True, + handle_prometheus_remote_write=True, + handle_prometheus_remote_read=True, +) + + +def execute_query_on_prometheus_writer(query, timestamp): + return execute_query_impl( + cluster.get_instance_ip(cluster.prometheus_writer_host), + cluster.prometheus_writer_port, + "/api/v1/query", + query, + timestamp, + ) + + +def execute_query_on_prometheus_reader(query, timestamp): + return execute_query_impl( + cluster.get_instance_ip(cluster.prometheus_reader_host), + cluster.prometheus_reader_port, + "/api/v1/query", + query, + timestamp, + ) + + +def execute_query_impl(host, port, path, query, timestamp): + if not path.startswith("/"): + path += "/" + url = f"http://{host}:{port}/{path.strip('/')}?query={query}&time={timestamp}" + print(f"Requesting {url}") + r = requests.get(url) + print(f"Status code: {r.status_code} {HTTPStatus(r.status_code).phrase}") + if r.status_code != requests.codes.ok: + print(f"Response: {r.text}") + raise Exception(f"Got unexpected status code {r.status_code}") + return r.json() + + +def show_query_result(query): + evaluation_time = time.time() + print(f"Evaluating query: {query}") + print(f"Evaluation time: {evaluation_time}") + result_from_writer = execute_query_on_prometheus_writer(query, evaluation_time) + print(f"Result from prometheus_writer: {result_from_writer}") + result_from_reader = execute_query_on_prometheus_reader(query, evaluation_time) + print(f"Result from prometheus_reader: {result_from_reader}") + + +def compare_query(query): + timeout = 60 + start_time = time.time() + evaluation_time = start_time + print(f"Evaluating query: {query}") + print(f"Evaluation time: {evaluation_time}") + while time.time() < start_time + timeout: + result_from_writer = execute_query_on_prometheus_writer(query, evaluation_time) + time.sleep(1) + result_from_reader = execute_query_on_prometheus_reader(query, evaluation_time) + print(f"Result from prometheus_writer: {result_from_writer}") + print(f"Result from prometheus_reader: {result_from_reader}") + if result_from_writer == result_from_reader: + return + raise Exception( + f"Got different results from prometheus_writer and prometheus_reader" + ) + + +def compare_queries(): + compare_query("up") + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + node.query("DROP TABLE IF EXISTS prometheus SYNC") + node.query("DROP TABLE IF EXISTS original SYNC") + node.query("DROP TABLE IF EXISTS mydata SYNC") + node.query("DROP TABLE IF EXISTS mytable SYNC") + node.query("DROP TABLE IF EXISTS mymetrics SYNC") + + +def test_default(): + node.query("CREATE TABLE prometheus ENGINE=TimeSeries") + compare_queries() + + +def test_tags_to_columns(): + node.query( + "CREATE TABLE prometheus ENGINE=TimeSeries SETTINGS tags_to_columns = {'job': 'job', 'instance': 'instance'}" + ) + compare_queries() + + +def test_64bit_id(): + node.query("CREATE TABLE prometheus (id UInt64) ENGINE=TimeSeries") + compare_queries() + + +def test_custom_id_algorithm(): + node.query( + "CREATE TABLE prometheus (id FixedString(16) DEFAULT murmurHash3_128(metric_name, all_tags)) ENGINE=TimeSeries" + ) + compare_queries() + + +def test_create_as_table(): + node.query("CREATE TABLE original ENGINE=TimeSeries") + node.query("CREATE TABLE prometheus AS original") + compare_queries() + + +def test_inner_engines(): + node.query( + "CREATE TABLE prometheus ENGINE=TimeSeries " + "DATA ENGINE=MergeTree ORDER BY (id, timestamp) " + "TAGS ENGINE=AggregatingMergeTree ORDER BY (metric_name, id) " + "METRICS ENGINE=ReplacingMergeTree ORDER BY metric_family_name" + ) + compare_queries() + + +def test_external_tables(): + node.query("DROP TABLE IF EXISTS mydata") + node.query("DROP TABLE IF EXISTS mytags") + node.query("DROP TABLE IF EXISTS mymetrics") + node.query("DROP TABLE IF EXISTS prometheus") + + node.query( + "CREATE TABLE mydata (id UUID, timestamp DateTime64(3), value Float64) " + "ENGINE=MergeTree ORDER BY (id, timestamp)" + ) + node.query( + "CREATE TABLE mytags (" + "id UUID, " + "metric_name LowCardinality(String), " + "tags Map(LowCardinality(String), String), " + "min_time SimpleAggregateFunction(min, Nullable(DateTime64(3))), " + "max_time SimpleAggregateFunction(max, Nullable(DateTime64(3)))) " + "ENGINE=AggregatingMergeTree ORDER BY (metric_name, id)" + ) + + # FIXME: The table structure should be: + # "CREATE TABLE mymetrics (metric_family_name String, type LowCardinality(String), unit LowCardinality(String), help String)" + # Renamed it because of the bug and potential type mismatch. + node.query( + "CREATE TABLE mymetrics (metric_family_name String, type String, unit String, help String) " + "ENGINE=ReplacingMergeTree ORDER BY metric_family_name" + ) + node.query( + "CREATE TABLE prometheus ENGINE=TimeSeries " + "DATA mydata TAGS mytags METRICS mymetrics" + ) + compare_queries() diff --git a/tests/integration/test_recovery_time_metric/__init__.py b/tests/integration/test_recovery_time_metric/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_recovery_time_metric/configs/config.xml b/tests/integration/test_recovery_time_metric/configs/config.xml new file mode 100644 index 00000000000..bad9b1fa9ea --- /dev/null +++ b/tests/integration/test_recovery_time_metric/configs/config.xml @@ -0,0 +1,41 @@ + + 9000 + + + + + + + + + default + + + + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + + diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py new file mode 100644 index 00000000000..6fcf2fad423 --- /dev/null +++ b/tests/integration/test_recovery_time_metric/test.py @@ -0,0 +1,61 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_recovery_time_metric(start_cluster): + node.query( + """ + DROP DATABASE IF EXISTS rdb; + CREATE DATABASE rdb + ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') + """ + ) + + node.query( + """ + DROP TABLE IF EXISTS rdb.t; + CREATE TABLE rdb.t + ( + `x` UInt32 + ) + ENGINE = MergeTree + ORDER BY x + """ + ) + + node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) + + node.restart_clickhouse() + + ret = int( + node.query( + """ + SELECT recovery_time + FROM system.clusters + WHERE cluster = 'rdb' + """ + ).strip() + ) + assert ret > 0 + + node.query( + """ + DROP DATABASE rdb + """ + ) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index b3b18dc8271..225cab975ff 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -1,5 +1,6 @@ import time import pytest +import random from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -418,72 +419,215 @@ def test_function_current_roles(): ) -def test_role_expiration(): - instance.query("CREATE USER ure") +@pytest.mark.parametrize("with_extra_role", [False, True]) +def test_role_expiration(with_extra_role): instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") + instance.query("CREATE USER ure DEFAULT ROLE rre") - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") + instance.query("CREATE TABLE table1 (id Int) Engine=Log") + instance.query("CREATE TABLE table2 (id Int) Engine=Log") + instance.query("INSERT INTO table1 VALUES (1)") + instance.query("INSERT INTO table2 VALUES (2)") + instance.query("GRANT SELECT ON table1 TO rre") + + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" + "SELECT * FROM table2", user="ure" ) - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test # so we wait >2 seconds until the role is expired time.sleep(5) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + if with_extra_role: + # Expiration of role "rre" from the role cache can be caused by another role being used. + instance.query("CREATE ROLE extra_role") + instance.query("CREATE USER extra_user DEFAULT ROLE extra_role") + instance.query("GRANT SELECT ON table1 TO extra_role") + assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n" - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + instance.query("GRANT SELECT ON table2 TO rre") + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" + assert instance.query("SELECT * FROM table2", user="ure") == "2\n" - instance.query("DROP USER ure") instance.query("DROP ROLE rre") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + instance.query("DROP USER ure") + instance.query("DROP TABLE table1") + instance.query("DROP TABLE table2") + + if with_extra_role: + instance.query("DROP ROLE extra_role") + instance.query("DROP USER extra_user") -def test_two_roles_expiration(): - instance.query("CREATE USER ure") - instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") +def test_roles_cache(): + # This test takes 20 seconds. + test_time = 20 - instance.query("CREATE ROLE rre_second") - - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") - - assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" - ) - - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test - # so we wait >2 seconds until the roles are expired - time.sleep(5) + # Three users A, B, C. + users = ["A", "B", "C"] + instance.query("CREATE USER " + ", ".join(users)) + # Table "tbl" has 10 columns. Each of the users has access to a different set of columns. + num_columns = 10 + columns = [f"x{i}" for i in range(1, num_columns + 1)] + columns_with_types = [column + " Int64" for column in columns] + columns_with_types_comma_separated = ", ".join(columns_with_types) + values = list(range(1, num_columns + 1)) + values_comma_separated = ", ".join([str(value) for value in values]) instance.query( - "GRANT SELECT ON tre1 TO rre_second" - ) # we expect that both rre and rre_second are gone from cache upon this operation + f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()" + ) + instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})") + columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)]) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + # In this test we create and modify roles multiple times along with updating the following variables. + # Then we check that each of the users has access to the expected set of columns. + roles = [] + users_to_roles = dict([(user, []) for user in users]) + roles_to_columns = {} - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + # Checks that each of the users can access the expected set of columns and can't access other columns. + def check(): + for user in random.sample(users, len(users)): + expected_roles = users_to_roles[user] + expected_columns = list( + set(sum([roles_to_columns[role] for role in expected_roles], [])) + ) + expected_result = sorted( + [columns_to_values[column] for column in expected_columns] + ) + query = " UNION ALL ".join( + [ + f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))" + for column in columns + ] + ) + result = instance.query(query, user=user).splitlines() + result = sorted([int(value) for value in result]) + ok = result == expected_result + if not ok: + print(f"Show grants for {user}:") + print( + instance.query( + "SHOW GRANTS FOR " + ", ".join([user] + expected_roles) + ) + ) + print(f"Expected result: {expected_result}") + print(f"Got unexpected result: {result}") + assert ok - instance.query("DROP USER ure") - instance.query("DROP ROLE rre") - instance.query("DROP ROLE rre_second") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + # Grants one of our roles a permission to access one of the columns. + def grant_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = [ + column for column in columns if column not in columns_used_in_roles + ] + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + role = random.choice(roles) + instance.query(f"GRANT SELECT({column}) ON tbl TO {role}") + roles_to_columns[role].append(column) + return True + + # Revokes a permission to access one of the granted column from all our roles. + def revoke_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = list(set(columns_used_in_roles)) + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + roles_str = ", ".join(roles) + instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}") + for role in roles_to_columns: + if column in roles_to_columns[role]: + roles_to_columns[role].remove(column) + return True + + # Creates a role and grants it to one of the users. + def create_role(): + for role in ["R1", "R2", "R3"]: + if role not in roles: + instance.query(f"CREATE ROLE {role}") + roles.append(role) + if role not in roles_to_columns: + roles_to_columns[role] = [] + if "R1" not in users_to_roles["A"]: + instance.query("GRANT R1 TO A") + users_to_roles["A"].append("R1") + elif "R2" not in users_to_roles["B"]: + instance.query("GRANT R2 TO B") + users_to_roles["B"].append("R2") + elif "R3" not in users_to_roles["B"]: + instance.query("GRANT R3 TO R2") + users_to_roles["B"].append("R3") + elif "R3" not in users_to_roles["C"]: + instance.query("GRANT R3 TO C") + users_to_roles["C"].append("R3") + else: + return False + return True + + # Drops one of our roles. + def drop_role(): + if not roles: + return False + role = random.choice(roles) + instance.query(f"DROP ROLE {role}") + roles.remove(role) + for u in users_to_roles: + if role in users_to_roles[u]: + users_to_roles[u].remove(role) + del roles_to_columns[role] + if (role == "R2") and ("R3" in users_to_roles["B"]): + users_to_roles["B"].remove("R3") + return True + + # Modifies some grants or roles randomly. + def modify(): + while True: + rnd = random.random() + if rnd < 0.4: + if grant_column(): + break + elif rnd < 0.5: + if revoke_column(): + break + elif rnd < 0.9: + if create_role(): + break + else: + if drop_role(): + break + + def maybe_modify(): + if random.random() < 0.9: + modify() + modify() + + # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout. + def maybe_sleep(): + if random.random() < 0.1: + # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration. + # We need a sleep longer than that in this test sometimes. + seconds = random.random() * 5 + print(f"Sleeping {seconds} seconds") + time.sleep(seconds) + + # Main part of the test. + start_time = time.time() + end_time = start_time + test_time + + while time.time() < end_time: + check() + maybe_sleep() + maybe_modify() + maybe_sleep() + + check() + + instance.query("DROP USER " + ", ".join(users)) + instance.query("DROP ROLE " + ", ".join(roles)) + instance.query("DROP TABLE tbl") diff --git a/tests/integration/test_s3_imds/test_simple.py b/tests/integration/test_s3_imds/test_simple.py index 0dacac2b0b9..4884c824f99 100644 --- a/tests/integration/test_s3_imds/test_simple.py +++ b/tests/integration/test_s3_imds/test_simple.py @@ -56,7 +56,7 @@ def test_credentials_from_metadata(): ) expected_logs = [ - "Calling EC2MetadataService to get token failed, falling back to less secure way", + "Calling EC2MetadataService to get token failed, falling back to a less secure way", "Getting default credentials for ec2 instance from resolver:8080", "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", diff --git a/tests/integration/test_server_keep_alive/__init__.py b/tests/integration/test_server_keep_alive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml b/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml new file mode 100644 index 00000000000..06e68044817 --- /dev/null +++ b/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml @@ -0,0 +1,4 @@ + + 3600 + 5 + diff --git a/tests/integration/test_server_keep_alive/test.py b/tests/integration/test_server_keep_alive/test.py new file mode 100644 index 00000000000..e550319b6df --- /dev/null +++ b/tests/integration/test_server_keep_alive/test.py @@ -0,0 +1,55 @@ +import logging +import pytest +import random +import requests + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/keep_alive_settings.xml"]) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_max_keep_alive_requests_on_user_side(start_cluster): + # In this test we have `keep_alive_timeout` set to one hour to never trigger connection reset by timeout, `max_keep_alive_requests` is set to 5. + # We expect server to close connection after each 5 requests. We detect connection reset by change in src port. + # So the first 5 requests should come from the same port, the following 5 requests should come from another port. + + log_comments = [] + for _ in range(10): + rand_id = random.randint(0, 1000000) + log_comment = f"test_requests_with_keep_alive_{rand_id}" + log_comments.append(log_comment) + log_comments = sorted(log_comments) + + session = requests.Session() + for i in range(10): + session.get( + f"http://{node.ip_address}:8123/?query=select%201&log_comment={log_comments[i]}" + ) + + ports = node.query( + f""" + SYSTEM FLUSH LOGS; + + SELECT port + FROM system.query_log + WHERE log_comment IN ({", ".join(f"'{comment}'" for comment in log_comments)}) AND type = 'QueryFinish' + ORDER BY log_comment + """ + ).split("\n")[:-1] + + expected = 5 * [ports[0]] + [ports[5]] * 5 + + assert ports == expected diff --git a/tests/integration/test_server_reload/configs/default_passwd.xml b/tests/integration/test_server_reload/configs/default_passwd.xml index f79149e7e23..9d664cbf9c4 100644 --- a/tests/integration/test_server_reload/configs/default_passwd.xml +++ b/tests/integration/test_server_reload/configs/default_passwd.xml @@ -1,4 +1,4 @@ - + @@ -9,4 +9,4 @@ 123 - + diff --git a/tests/integration/test_server_reload/configs/overrides_from_zk.xml b/tests/integration/test_server_reload/configs/overrides_from_zk.xml index d420faa88a2..aa6105f6ebe 100644 --- a/tests/integration/test_server_reload/configs/overrides_from_zk.xml +++ b/tests/integration/test_server_reload/configs/overrides_from_zk.xml @@ -1,4 +1,4 @@ - + @@ -7,4 +7,4 @@ - + diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 6fbe7634642..c1f518e45ce 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -5,6 +5,7 @@ import json import logging import os import io +import re import random import threading import time @@ -134,6 +135,7 @@ def test_create_table_connection_string(cluster): Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV') """, ) + azure_query(node, "DROP TABLE IF EXISTS test_create_table_conn_string") def test_create_table_account_string(cluster): @@ -143,6 +145,7 @@ def test_create_table_account_string(cluster): f"CREATE TABLE test_create_table_account_url (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f"'cont', 'test_create_connection_string', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')", ) + azure_query(node, "DROP TABLE IF EXISTS test_create_table_account_url") def test_simple_write_account_string(cluster): @@ -156,6 +159,7 @@ def test_simple_write_account_string(cluster): azure_query(node, "INSERT INTO test_simple_write VALUES (1, 'a')") print(get_azure_file_content("test_simple_write.csv", port)) assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write") def test_simple_write_connection_string(cluster): @@ -169,6 +173,7 @@ def test_simple_write_connection_string(cluster): azure_query(node, "INSERT INTO test_simple_write_connection_string VALUES (1, 'a')") print(get_azure_file_content("test_simple_write_c.csv", port)) assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write_connection_string") def test_simple_write_named_collection_1(cluster): @@ -184,7 +189,7 @@ def test_simple_write_named_collection_1(cluster): ) print(get_azure_file_content("test_simple_write_named.csv", port)) assert get_azure_file_content("test_simple_write_named.csv", port) == '1,"a"\n' - azure_query(node, "TRUNCATE TABLE test_simple_write_named_collection_1") + azure_query(node, "DROP TABLE test_simple_write_named_collection_1") def test_simple_write_named_collection_2(cluster): @@ -201,6 +206,7 @@ def test_simple_write_named_collection_2(cluster): ) print(get_azure_file_content("test_simple_write_named_2.csv", port)) assert get_azure_file_content("test_simple_write_named_2.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write_named_collection_2") def test_partition_by(cluster): @@ -222,6 +228,7 @@ def test_partition_by(cluster): assert "1,2,3\n" == get_azure_file_content("test_3.csv", port) assert "3,2,1\n" == get_azure_file_content("test_1.csv", port) assert "78,43,45\n" == get_azure_file_content("test_45.csv", port) + azure_query(node, "DROP TABLE test_partitioned_write") def test_partition_by_string_column(cluster): @@ -242,6 +249,7 @@ def test_partition_by_string_column(cluster): assert '1,"foo/bar"\n' == get_azure_file_content("test_foo/bar.csv", port) assert '3,"йцук"\n' == get_azure_file_content("test_йцук.csv", port) assert '78,"你好"\n' == get_azure_file_content("test_你好.csv", port) + azure_query(node, "DROP TABLE test_partitioned_string_write") def test_partition_by_const_column(cluster): @@ -260,6 +268,7 @@ def test_partition_by_const_column(cluster): ) azure_query(node, f"INSERT INTO test_partitioned_const_write VALUES {values}") assert values_csv == get_azure_file_content("test_88.csv", port) + azure_query(node, "DROP TABLE test_partitioned_const_write") def test_truncate(cluster): @@ -275,6 +284,7 @@ def test_truncate(cluster): azure_query(node, "TRUNCATE TABLE test_truncate") with pytest.raises(Exception): print(get_azure_file_content("test_truncate.csv", port)) + azure_query(node, "DROP TABLE test_truncate") def test_simple_read_write(cluster): @@ -291,6 +301,7 @@ def test_simple_read_write(cluster): assert get_azure_file_content("test_simple_read_write.csv", port) == '1,"a"\n' print(azure_query(node, "SELECT * FROM test_simple_read_write")) assert azure_query(node, "SELECT * FROM test_simple_read_write") == "1\ta\n" + azure_query(node, "DROP TABLE test_simple_read_write") def test_create_new_files_on_insert(cluster): @@ -343,6 +354,7 @@ def test_overwrite(cluster): result = azure_query(node, f"select count() from test_overwrite") assert int(result) == 200 + azure_query(node, f"DROP TABLE test_overwrite") def test_insert_with_path_with_globs(cluster): @@ -355,6 +367,7 @@ def test_insert_with_path_with_globs(cluster): node.query_and_get_error( f"insert into table function test_insert_globs SELECT number, randomString(100) FROM numbers(500)" ) + azure_query(node, f"DROP TABLE test_insert_globs") def test_put_get_with_globs(cluster): @@ -363,6 +376,7 @@ def test_put_get_with_globs(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" + used_names = [] for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( @@ -371,6 +385,8 @@ def test_put_get_with_globs(cluster): max_path = max(path, max_path) values = f"({i},{j},{i + j})" + used_names.append(f"test_put_{i}_{j}") + azure_query( node, f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, " @@ -391,6 +407,9 @@ def test_put_get_with_globs(cluster): bucket="cont", max_path=max_path ) ] + azure_query(node, "DROP TABLE test_glob_select") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") def test_azure_glob_scheherazade(cluster): @@ -399,12 +418,14 @@ def test_azure_glob_scheherazade(cluster): values = "(1, 1, 1)" nights_per_job = 1001 // 30 jobs = [] + used_names = [] for night in range(0, 1001, nights_per_job): def add_tales(start, end): for i in range(start, end): path = "night_{}/tale.csv".format(i) unique_num = random.randint(1, 10000) + used_names.append(f"test_scheherazade_{i}_{unique_num}") azure_query( node, f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, " @@ -432,6 +453,9 @@ def test_azure_glob_scheherazade(cluster): ) query = "select count(), sum(column1), sum(column2), sum(column3) from test_glob_select_scheherazade" assert azure_query(node, query).splitlines() == ["1001\t1001\t1001\t1001"] + azure_query(node, "DROP TABLE test_glob_select_scheherazade") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") @pytest.mark.parametrize( @@ -505,6 +529,8 @@ def test_schema_inference_no_globs(cluster): assert azure_query(node, query).splitlines() == [ "499500\t2890\t332833500\ttest_schema_inference_no_globs.csv\tcont/test_schema_inference_no_globs.csv" ] + azure_query(node, f"DROP TABLE test_schema_inference_src") + azure_query(node, f"DROP TABLE test_select_inference") def test_schema_inference_from_globs(cluster): @@ -513,6 +539,7 @@ def test_schema_inference_from_globs(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" + used_names = [] for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( @@ -520,6 +547,7 @@ def test_schema_inference_from_globs(cluster): ) max_path = max(path, max_path) values = f"({i},{j},{i + j})" + used_names.append(f"test_schema_{i}_{j}") azure_query( node, @@ -545,6 +573,9 @@ def test_schema_inference_from_globs(cluster): bucket="cont", max_path=max_path ) ] + azure_query(node, "DROP TABLE test_glob_select_inference") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") def test_simple_write_account_string_table_function(cluster): @@ -594,7 +625,7 @@ def test_simple_write_named_collection_1_table_function(cluster): azure_query( node, - "TRUNCATE TABLE drop_table", + "DROP TABLE drop_table", ) @@ -605,6 +636,7 @@ def test_simple_write_named_collection_2_table_function(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_simple_write_named_2_tf.csv", port)) assert get_azure_file_content("test_simple_write_named_2_tf.csv", port) == '1,"a"\n' @@ -628,6 +660,7 @@ def test_put_get_with_globs_tf(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) query = ( f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, " @@ -648,7 +681,7 @@ def test_schema_inference_no_globs_tf(cluster): query = ( f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"container='cont', blob_path='test_schema_inference_no_globs_tf.csv', format='CSVWithNames', structure='{table_format}') " - f"SELECT number, toString(number), number * number FROM numbers(1000)" + f"SELECT number, toString(number), number * number FROM numbers(1000) SETTINGS azure_truncate_on_insert=1" ) azure_query(node, query) @@ -681,7 +714,7 @@ def test_schema_inference_from_globs_tf(cluster): f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}" ) - azure_query(node, query) + azure_query(node, query, settings={"azure_truncate_on_insert": 1}) query = ( f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, " @@ -708,6 +741,7 @@ def test_partition_by_tf(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', " f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv", port) @@ -727,6 +761,7 @@ def test_filter_using_file(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont', '{filename}', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', " f"'{table_format}') PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) query = ( @@ -744,7 +779,7 @@ def test_read_subcolumns(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," - f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS azure_truncate_on_insert=1", ) azure_query( @@ -794,7 +829,7 @@ def test_read_subcolumn_time(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," - f" 'a UInt32') select (42)", + f" 'a UInt32') select (42) SETTINGS azure_truncate_on_insert=1", ) res = node.query( @@ -825,6 +860,7 @@ def test_function_signatures(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)", + settings={"azure_truncate_on_insert": 1}, ) # " - connection_string, container_name, blobpath\n" @@ -939,11 +975,13 @@ def test_union_schema_inference_mode(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)", + settings={"azure_truncate_on_insert": 1}, ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2)", + settings={"azure_truncate_on_insert": 1}, ) node.query("system drop schema cache for azure") @@ -981,6 +1019,7 @@ def test_union_schema_inference_mode(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error')", + settings={"azure_truncate_on_insert": 1}, ) error = azure_query( @@ -1002,7 +1041,7 @@ def test_schema_inference_cache(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') " - f"select * from numbers(100)", + f"select * from numbers(100) SETTINGS azure_truncate_on_insert=1", ) time.sleep(1) @@ -1209,19 +1248,19 @@ def test_filtering_by_file_or_path(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter1.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter2.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_filter3.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3 SETTINGS azure_truncate_on_insert=1", ) node.query( @@ -1233,7 +1272,7 @@ def test_filtering_by_file_or_path(cluster): node.query("SYSTEM FLUSH LOGS") result = node.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish' ORDER BY event_time_microseconds DESC LIMIT 1" ) assert int(result) == 1 @@ -1245,19 +1284,19 @@ def test_size_virtual_column(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column1.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column2.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column3.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111 SETTINGS azure_truncate_on_insert=1", ) result = azure_query( @@ -1280,7 +1319,7 @@ def test_format_detection(cluster): account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0) SETTINGS azure_truncate_on_insert=1", ) azure_query( @@ -1350,7 +1389,7 @@ def test_write_to_globbed_partitioned_path(cluster): account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" error = azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42 SETTINGS azure_truncate_on_insert=1", expect_error="true", ) @@ -1462,3 +1501,94 @@ def test_insert_create_new_file(cluster): assert TSV(res) == TSV( "test_create_new_file.csv\t1\ntest_create_new_file.1.csv\t2\n" ) + + +def test_hive_partitioning_with_one_parameter(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values = f"('Elizabeth', 'Gordon')" + path = "a/column1=Elizabeth/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == [ + "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format( + bucket="cont", max_path=path + ) + ] + + query = ( + f"SELECT column2 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == ["Gordon"] + + +def test_hive_partitioning_with_all_parameters(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values_1 = f"('Elizabeth', 'Gordon')" + values_2 = f"('Emilia', 'Gregor')" + path = "a/column1=Elizabeth/column2=Gordon/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" + ) + pattern = r"DB::Exception: Cannot use hive partitioning for file" + + with pytest.raises(Exception, match=pattern): + azure_query(node, query, settings={"use_hive_partitioning": 1}) + + +def test_hive_partitioning_without_setting(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values_1 = f"('Elizabeth', 'Gordon')" + values_2 = f"('Emilia', 'Gregor')" + path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" + ) + pattern = re.compile( + r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL + ) + + with pytest.raises(Exception, match=pattern): + azure_query(node, query, settings={"use_hive_partitioning": 0}) diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py index 6c5e2d20ca5..04baf007c69 100644 --- a/tests/integration/test_storage_azure_blob_storage/test_cluster.py +++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py @@ -72,6 +72,7 @@ def test_select_all(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1'," f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String') " f"VALUES (1, 'a'), (2, 'b')", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_cluster_select_all.csv", port)) @@ -101,6 +102,7 @@ def test_count(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " f"'auto', 'key UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_cluster_count.csv", port)) @@ -129,6 +131,7 @@ def test_union_all(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_parquet_union_all', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', " f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')", + settings={"azure_truncate_on_insert": 1}, ) pure_azure = azure_query( @@ -180,6 +183,7 @@ def test_skip_unavailable_shards(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) result = azure_query( node, @@ -200,6 +204,7 @@ def test_unset_skip_unavailable_shards(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) result = azure_query( node, @@ -218,6 +223,7 @@ def test_cluster_with_named_collection(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) pure_azure = azure_query( @@ -249,6 +255,7 @@ def test_partition_parallel_reading_with_cluster(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', '{filename}', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') " f"PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv", port) @@ -272,12 +279,12 @@ def test_format_detection(cluster): azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10) SETTINGS azure_truncate_on_insert=1", ) azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10) SETTINGS azure_truncate_on_insert=1", ) expected_desc_result = azure_query( diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 054b79ff6fe..75a4b6cc221 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -464,7 +464,7 @@ def test_restart_broken(started_cluster): """ SELECT value FROM system.metrics - WHERE metric = 'S3DiskNoKeyErrors' + WHERE metric = 'DiskS3NoSuchKeyErrors' """ ).strip() ) @@ -572,7 +572,7 @@ def test_partition_columns(started_cluster): "test" + str(i), datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), i, - False, + False if i % 2 == 0 else True, ) ] df = spark.createDataFrame(data=data, schema=schema) @@ -622,15 +622,15 @@ def test_partition_columns(started_cluster): ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" ) assert ( - """1 test1 2000-01-01 1 false + """1 test1 2000-01-01 1 true 2 test2 2000-01-02 2 false -3 test3 2000-01-03 3 false +3 test3 2000-01-03 3 true 4 test4 2000-01-04 4 false -5 test5 2000-01-05 5 false +5 test5 2000-01-05 5 true 6 test6 2000-01-06 6 false -7 test7 2000-01-07 7 false +7 test7 2000-01-07 7 true 8 test8 2000-01-08 8 false -9 test9 2000-01-09 9 false""" +9 test9 2000-01-09 9 true""" == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() ) @@ -670,7 +670,7 @@ test9 2000-01-09 9""" "test" + str(i), datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), i, - False, + False if i % 2 == 0 else True, ) ] df = spark.createDataFrame(data=data, schema=schema) @@ -696,23 +696,23 @@ test9 2000-01-09 9""" assert result == num_rows * 2 assert ( - """1 test1 2000-01-01 1 false + """1 test1 2000-01-01 1 true 2 test2 2000-01-02 2 false -3 test3 2000-01-03 3 false +3 test3 2000-01-03 3 true 4 test4 2000-01-04 4 false -5 test5 2000-01-05 5 false +5 test5 2000-01-05 5 true 6 test6 2000-01-06 6 false -7 test7 2000-01-07 7 false +7 test7 2000-01-07 7 true 8 test8 2000-01-08 8 false -9 test9 2000-01-09 9 false +9 test9 2000-01-09 9 true 10 test10 2000-01-10 10 false -11 test11 2000-01-11 11 false +11 test11 2000-01-11 11 true 12 test12 2000-01-12 12 false -13 test13 2000-01-13 13 false +13 test13 2000-01-13 13 true 14 test14 2000-01-14 14 false -15 test15 2000-01-15 15 false +15 test15 2000-01-15 15 true 16 test16 2000-01-16 16 false -17 test17 2000-01-17 17 false +17 test17 2000-01-17 17 true 18 test18 2000-01-18 18 false""" == instance.query( f""" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index aef5ddb3675..7a92e8adb0d 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -3,7 +3,9 @@ import os import pytest import uuid import time +import re from helpers.cluster import ClickHouseCluster, is_arm +from helpers.client import QueryRuntimeException from helpers.test_tools import TSV from pyhdfs import HdfsClient @@ -1255,6 +1257,43 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): assert int(result) == 44 +def test_hive_partitioning_with_one_parameter(started_cluster): + hdfs_api = started_cluster.hdfs_api + hdfs_api.write_data( + f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/file_1") + == f"column0,column1\nElizabeth,Gordon\n" + ) + + r = node1.query( + "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')", + settings={"use_hive_partitioning": 1}, + ) + assert r == f"Elizabeth\n" + + +def test_hive_partitioning_without_setting(started_cluster): + hdfs_api = started_cluster.hdfs_api + hdfs_api.write_data( + f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2") + == f"Elizabeth\tGordon\n" + ) + pattern = re.compile( + r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL + ) + + with pytest.raises(QueryRuntimeException, match=pattern): + node1.query( + f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + settings={"use_hive_partitioning": 0}, + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp new file mode 100644 index 00000000000..4f3eabe22f0 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp @@ -0,0 +1,6 @@ +@0x99f75f775fe63dae; + +struct StringKeyValuePair { + key@0 : Text; + value@1 : Text; +} diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format new file mode 100644 index 00000000000..83dff6ce401 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format @@ -0,0 +1 @@ +(key = ${key:CSV}, value = ${value:CSV}) diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto new file mode 100644 index 00000000000..71905c63bdf --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message StringKeyValuePair { + string key = 1; + string value = 2; +} diff --git a/tests/integration/test_storage_kafka/configs/kafka.xml b/tests/integration/test_storage_kafka/configs/kafka.xml index b10db879b72..a846fdbb295 100644 --- a/tests/integration/test_storage_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kafka/configs/kafka.xml @@ -48,7 +48,7 @@ - 30001 + 30001
60001 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 37457e00701..bef90e1b9d3 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -29,6 +29,7 @@ from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnecti from kafka.protocol.admin import DescribeGroupsRequest_v1 from kafka.protocol.group import MemberAssignment from kafka.admin import NewTopic +from contextlib import contextmanager # protoc --version @@ -46,6 +47,13 @@ if is_arm(): # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. # TODO: add test for SELECT LIMIT is working. + +KAFKA_TOPIC_OLD = "old_t" +KAFKA_CONSUMER_GROUP_OLD = "old_cg" +KAFKA_TOPIC_NEW = "new_t" +KAFKA_CONSUMER_GROUP_NEW = "new_cg" + + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", @@ -55,10 +63,10 @@ instance = cluster.add_instance( with_zookeeper=True, # For Replicated Table macros={ "kafka_broker": "kafka1", - "kafka_topic_old": "old", - "kafka_group_name_old": "old", - "kafka_topic_new": "new", - "kafka_group_name_new": "new", + "kafka_topic_old": KAFKA_TOPIC_OLD, + "kafka_group_name_old": KAFKA_CONSUMER_GROUP_OLD, + "kafka_topic_new": KAFKA_TOPIC_NEW, + "kafka_group_name_new": KAFKA_CONSUMER_GROUP_NEW, "kafka_client_id": "instance", "kafka_format_json_each_row": "JSONEachRow", }, @@ -142,6 +150,44 @@ def kafka_delete_topic(admin_client, topic, max_retries=50): raise Exception(f"Failed to delete topics {topic}, {result}") +@contextmanager +def kafka_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + kafka_create_topic( + admin_client, + topic_name, + num_partitions, + replication_factor, + max_retries, + config, + ) + try: + yield None + finally: + # Code to release resource, e.g.: + kafka_delete_topic(admin_client, topic_name, max_retries) + + +@contextmanager +def existing_kafka_topic(admin_client, topic_name, max_retries=50): + try: + yield None + finally: + kafka_delete_topic(admin_client, topic_name, max_retries) + + +def get_admin_client(kafka_cluster): + return KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): logging.debug( "kafka_produce server:{}:{} topic:{}".format( @@ -161,7 +207,7 @@ def kafka_producer_send_heartbeat_msg(max_retries=50): kafka_produce(kafka_cluster, "test_heartbeat_topic", ["test"], retries=max_retries) -def kafka_consume(kafka_cluster, topic, needDecode=True, timestamp=0): +def kafka_consume(kafka_cluster, topic, need_decode=True, timestamp=0): consumer = KafkaConsumer( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), auto_offset_reset="earliest", @@ -171,7 +217,7 @@ def kafka_consume(kafka_cluster, topic, needDecode=True, timestamp=0): if toppar.topic == topic: for message in messages: assert timestamp == 0 or message.timestamp / 1000 == timestamp - if needDecode: + if need_decode: yield message.value.decode() else: yield message.value @@ -196,7 +242,33 @@ def kafka_produce_protobuf_messages(kafka_cluster, topic, start_index, num_messa logging.debug(("Produced {} messages for topic {}".format(num_messages, topic))) -def kafka_produce_protobuf_messages_no_delimeters( +def kafka_consume_with_retry( + kafka_cluster, + topic, + expected_messages, + need_decode=True, + timestamp=0, + retry_count=20, + sleep_time=0.1, +): + messages = [] + try_count = 0 + while try_count < retry_count: + try_count += 1 + messages.extend( + kafka_consume( + kafka_cluster, topic, need_decode=need_decode, timestamp=timestamp + ) + ) + if len(messages) == expected_messages: + break + time.sleep(sleep_time) + if len(messages) != expected_messages: + raise Exception(f"Got only {len(messages)} messages") + return messages + + +def kafka_produce_protobuf_messages_no_delimiters( kafka_cluster, topic, start_index, num_messages ): data = "" @@ -284,10 +356,99 @@ def avro_confluent_message(schema_registry_client, value): return serializer.encode_record_with_schema("test_subject", schema, value) +def create_settings_string(settings): + if settings is None: + return "" + + def format_value(value): + if isinstance(value, str): + return f"'{value}'" + elif isinstance(value, bool): + return str(int(value)) + return str(value) + + settings_string = "SETTINGS " + keys = settings.keys() + first_key = next(iter(settings)) + settings_string += str(first_key) + " = " + format_value(settings[first_key]) + for key in keys: + if key == first_key: + continue + settings_string += ", " + str(key) + " = " + format_value(settings[key]) + return settings_string + + +def generate_old_create_table_query( + table_name, + columns_def, + database="test", + brokers="{kafka_broker}:19092", + topic_list="{kafka_topic_new}", + consumer_group="{kafka_group_name_new}", + format="{kafka_format_json_each_row}", + row_delimiter="\\n", + keeper_path=None, # it is not used, but it is easier to handle keeper_path and replica_name like this + replica_name=None, + settings=None, +): + settings_string = create_settings_string(settings) + query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') +{settings_string}""" + logging.debug(f"Generated old create query: {query}") + return query + + +def generate_new_create_table_query( + table_name, + columns_def, + database="test", + brokers="{kafka_broker}:19092", + topic_list="{kafka_topic_new}", + consumer_group="{kafka_group_name_new}", + format="{kafka_format_json_each_row}", + row_delimiter="\\n", + keeper_path=None, + replica_name=None, + settings=None, +): + if settings is None: + settings = {} + if keeper_path is None: + keeper_path = f"/clickhouse/{{database}}/{table_name}" + if replica_name is None: + replica_name = "r1" + settings["kafka_keeper_path"] = keeper_path + settings["kafka_replica_name"] = replica_name + settings_string = create_settings_string(settings) + query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') +{settings_string} +SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1""" + logging.debug(f"Generated new create query: {query}") + return query + + +def must_use_thread_per_consumer(generator): + if generator == generate_old_create_table_query: + return False + if generator == generate_new_create_table_query: + return True + raise Exception("Unexpected generator") + + +def get_topic_postfix(generator): + if generator == generate_old_create_table_query: + return "old" + if generator == generate_new_create_table_query: + return "new" + raise Exception("Unexpected generator") + + # Tests - - -def test_kafka_column_types(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_column_types(kafka_cluster, create_query_generator, do_direct_read): def assert_returned_exception(e): assert e.value.returncode == 36 assert ( @@ -297,57 +458,14 @@ def test_kafka_column_types(kafka_cluster): # check column with DEFAULT expression with pytest.raises(QueryRuntimeException) as exception: - instance.query( - """ - CREATE TABLE test.kafka (a Int, b Int DEFAULT 0) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') - """ - ) + instance.query(create_query_generator("kafka", "a Int, b Int DEFAULT 0")) assert_returned_exception(exception) # check EPHEMERAL with pytest.raises(QueryRuntimeException) as exception: - instance.query( - """ - CREATE TABLE test.kafka (a Int, b Int EPHEMERAL) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') - """ - ) + instance.query(create_query_generator("kafka", "a Int, b Int EPHEMERAL")) assert_returned_exception(exception) - # check ALIAS - instance.query( - """ - CREATE TABLE test.kafka (a Int, b String Alias toString(a)) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') - SETTINGS kafka_commit_on_select = 1; - """ - ) - messages = [] - for i in range(5): - messages.append(json.dumps({"a": i})) - kafka_produce(kafka_cluster, "new", messages) - result = "" - expected = TSV( - """ -0\t0 -1\t1 -2\t2 -3\t3 -4\t4 - """ - ) - retries = 50 - while retries > 0: - result += instance.query("SELECT a, b FROM test.kafka", ignore_error=True) - if TSV(result) == expected: - break - retries -= 1 - - assert TSV(result) == expected - - instance.query("DROP TABLE test.kafka SYNC") - # check MATERIALIZED with pytest.raises(QueryRuntimeException) as exception: instance.query( @@ -358,6 +476,41 @@ def test_kafka_column_types(kafka_cluster): ) assert_returned_exception(exception) + if do_direct_read: + # check ALIAS + instance.query( + create_query_generator( + "kafka", + "a Int, b String Alias toString(a)", + settings={"kafka_commit_on_select": True}, + ) + ) + messages = [] + for i in range(5): + messages.append(json.dumps({"a": i})) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) + result = "" + expected = TSV( + """ + 0\t0 + 1\t1 + 2\t2 + 3\t3 + 4\t4 + """ + ) + retries = 50 + while retries > 0: + result += instance.query("SELECT a, b FROM test.kafka", ignore_error=True) + if TSV(result) == expected: + break + retries -= 1 + time.sleep(0.5) + + assert TSV(result) == expected + + instance.query("DROP TABLE test.kafka SYNC") + def test_kafka_settings_old_syntax(kafka_cluster): assert TSV( @@ -366,13 +519,13 @@ def test_kafka_settings_old_syntax(kafka_cluster): ignore_error=True, ) ) == TSV( - """kafka_broker kafka1 + f"""kafka_broker kafka1 kafka_client_id instance kafka_format_json_each_row JSONEachRow -kafka_group_name_new new -kafka_group_name_old old -kafka_topic_new new -kafka_topic_old old +kafka_group_name_new {KAFKA_CONSUMER_GROUP_NEW} +kafka_group_name_old {KAFKA_CONSUMER_GROUP_OLD} +kafka_topic_new new_t +kafka_topic_old old_t """ ) @@ -389,7 +542,7 @@ kafka_topic_old old messages = [] for i in range(50): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "old", messages) + kafka_produce(kafka_cluster, KAFKA_TOPIC_OLD, messages) result = "" while True: @@ -399,7 +552,7 @@ kafka_topic_old old kafka_check_result(result, True) - members = describe_consumer_group(kafka_cluster, "old") + members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_OLD) assert members[0]["client_id"] == "ClickHouse-instance-test-kafka" # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) @@ -423,16 +576,16 @@ def test_kafka_settings_new_syntax(kafka_cluster): messages = [] for i in range(25): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "new", messages) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) # Insert couple of malformed messages. - kafka_produce(kafka_cluster, "new", ["}{very_broken_message,"]) - kafka_produce(kafka_cluster, "new", ["}another{very_broken_message,"]) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}{very_broken_message,"]) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}another{very_broken_message,"]) messages = [] for i in range(25, 50): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "new", messages) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) result = "" while True: @@ -442,7 +595,7 @@ def test_kafka_settings_new_syntax(kafka_cluster): kafka_check_result(result, True) - members = describe_consumer_group(kafka_cluster, "new") + members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_NEW) assert members[0]["client_id"] == "instance test 1234" @@ -520,12 +673,13 @@ def test_kafka_json_as_string(kafka_cluster): ) -def test_kafka_formats(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_formats(kafka_cluster, create_query_generator): schema_registry_client = CachedSchemaRegistryClient( - "http://localhost:{}".format(kafka_cluster.schema_registry_port) - ) - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + {"url": f"http://localhost:{kafka_cluster.schema_registry_port}"} ) # data was dumped from clickhouse itself in a following manner @@ -649,7 +803,7 @@ def test_kafka_formats(kafka_cluster): '(id = 1, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 2, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 3, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 4, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 5, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 6, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 7, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 8, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 9, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 10, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 11, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 12, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 13, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 14, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 15, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', ], - "extra_settings": ", format_template_row='template_row.format'", + "extra_settings": {"format_template_row": "template_row.format"}, }, "Regexp": { "data_sample": [ @@ -660,7 +814,10 @@ def test_kafka_formats(kafka_cluster): # On empty message exception happens: Line "" doesn't match the regexp.: (at row 1) # /src/Processors/Formats/Impl/RegexpRowInputFormat.cpp:140: DB::RegexpRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df82fcb in /usr/bin/clickhouse ], - "extra_settings": r", format_regexp='\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)', format_regexp_escaping_rule='Escaped'", + "extra_settings": { + "format_regexp": r"\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)", + "format_regexp_escaping_rule": "Escaped", + }, }, ## BINARY FORMATS # dumped with @@ -732,7 +889,7 @@ def test_kafka_formats(kafka_cluster): # /src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp:25: DB::ProtobufRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df4cc71 in /usr/bin/clickhouse # /src/Processors/Formats/IRowInputFormat.cpp:64: DB::IRowInputFormat::generate() @ 0x1de727cf in /usr/bin/clickhouse ], - "extra_settings": ", kafka_schema='test:TestMessage'", + "extra_settings": {"kafka_schema": "test:TestMessage"}, }, "ORC": { "data_sample": [ @@ -756,7 +913,7 @@ def test_kafka_formats(kafka_cluster): # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:212: DB::CapnProtoRowInputFormat::readMessage() @ 0x1ded1cab in /usr/bin/clickhouse # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:241: DB::CapnProtoRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1ded205d in /usr/bin/clickhouse ], - "extra_settings": ", kafka_schema='test:TestRecordStruct'", + "extra_settings": {"kafka_schema": "test:TestRecordStruct"}, }, "Parquet": { "data_sample": [ @@ -791,9 +948,12 @@ def test_kafka_formats(kafka_cluster): {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1}, ), ], - "extra_settings": ", format_avro_schema_registry_url='http://{}:{}'".format( - kafka_cluster.schema_registry_host, kafka_cluster.schema_registry_port - ), + "extra_settings": { + "format_avro_schema_registry_url": "http://{}:{}".format( + kafka_cluster.schema_registry_host, + kafka_cluster.schema_registry_port, + ) + }, "supports_empty_value": True, }, "Avro": { @@ -837,40 +997,41 @@ def test_kafka_formats(kafka_cluster): }, } + topic_postfix = str(hash(create_query_generator)) for format_name, format_opts in list(all_formats.items()): - logging.debug(("Set up {}".format(format_name))) - topic_name = "format_tests_{}".format(format_name) + logging.debug(f"Set up {format_name}") + topic_name = f"format_tests_{format_name}-{topic_postfix}" data_sample = format_opts["data_sample"] data_prefix = [] # prepend empty value when supported if format_opts.get("supports_empty_value", False): data_prefix = data_prefix + [""] kafka_produce(kafka_cluster, topic_name, data_prefix + data_sample) + + extra_settings = format_opts.get("extra_settings") or {} + extra_settings["kafka_flush_interval_ms"] = 1000 + instance.query( """ DROP TABLE IF EXISTS test.kafka_{format_name}; - CREATE TABLE test.kafka_{format_name} ( - id Int64, - blockNo UInt16, - val1 String, - val2 Float32, - val3 UInt8 - ) ENGINE = Kafka() - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_group_name = '{topic_name}_group', - kafka_format = '{format_name}', - kafka_flush_interval_ms = 1000 {extra_settings}; + {create_query}; DROP TABLE IF EXISTS test.kafka_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; """.format( topic_name=topic_name, format_name=format_name, - extra_settings=format_opts.get("extra_settings") or "", + create_query=create_query_generator( + f"kafka_{format_name}", + "id Int64, blockNo UInt16, val1 String, val2 Float32, val3 UInt8", + topic_list=f"{topic_name}", + consumer_group=f"{topic_name}_group", + format=format_name, + settings=extra_settings, + ), ) ) raw_expected = """\ @@ -905,13 +1066,16 @@ def test_kafka_formats(kafka_cluster): for format_name, format_opts in list(all_formats.items()): logging.debug(("Checking {}".format(format_name))) - topic_name = f"format_tests_{format_name}" + topic_name = f"format_tests_{format_name}-{topic_postfix}" # shift offsets by 1 if format supports empty value offsets = ( [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] ) - result = instance.query( - "SELECT * FROM test.kafka_{format_name}_mv;".format(format_name=format_name) + result = instance.query_with_retry( + "SELECT * FROM test.kafka_{format_name}_mv;".format( + format_name=format_name + ), + check_callback=lambda x: x.count("\n") == raw_expected.count("\n"), ) expected = raw_expected.format( topic_name=topic_name, @@ -922,7 +1086,7 @@ def test_kafka_formats(kafka_cluster): assert TSV(result) == TSV(expected), "Proper result for format: {}".format( format_name ) - kafka_delete_topic(admin_client, topic_name) + kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) # Since everything is async and shaky when receiving messages from Kafka, @@ -997,8 +1161,29 @@ def kafka_cluster(): @pytest.fixture(autouse=True) def kafka_setup_teardown(): - instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") - # logging.debug("kafka is available - running test") + instance.query("DROP DATABASE IF EXISTS test SYNC; CREATE DATABASE test;") + admin_client = get_admin_client(cluster) + + def get_topics_to_delete(): + return [t for t in admin_client.list_topics() if not t.startswith("_")] + + topics = get_topics_to_delete() + logging.debug(f"Deleting topics: {topics}") + result = admin_client.delete_topics(topics) + for topic, error in result.topic_error_codes: + if error != 0: + logging.warning(f"Received error {error} while deleting topic {topic}") + else: + logging.info(f"Deleted topic {topic}") + + retries = 0 + topics = get_topics_to_delete() + while len(topics) != 0: + logging.info(f"Existing topics: {topics}") + if retries >= 5: + raise Exception(f"Failed to delete topics {topics}") + retries += 1 + time.sleep(0.5) yield # run test @@ -1098,9 +1283,7 @@ def test_kafka_issue4116(kafka_cluster): def test_kafka_consumer_hang(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang" kafka_create_topic(admin_client, topic_name, num_partitions=8) @@ -1180,9 +1363,7 @@ def test_kafka_consumer_hang(kafka_cluster): def test_kafka_consumer_hang2(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang2" kafka_create_topic(admin_client, topic_name) @@ -1243,9 +1424,7 @@ def test_kafka_consumer_hang2(kafka_cluster): # sequential read from different consumers leads to breaking lot of kafka invariants # (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec) def test_kafka_read_consumers_in_parallel(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + admin_client = get_admin_client(kafka_cluster) topic_name = "read_consumers_in_parallel" kafka_create_topic(admin_client, topic_name, num_partitions=8) @@ -1365,9 +1544,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): def test_kafka_select_empty(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + admin_client = get_admin_client(kafka_cluster) topic_name = "empty" kafka_create_topic(admin_client, topic_name) @@ -1545,13 +1722,13 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster): """ ) - kafka_produce_protobuf_messages_no_delimeters( + kafka_produce_protobuf_messages_no_delimiters( kafka_cluster, "pb_no_delimiter", 0, 20 ) - kafka_produce_protobuf_messages_no_delimeters( + kafka_produce_protobuf_messages_no_delimiters( kafka_cluster, "pb_no_delimiter", 20, 1 ) - kafka_produce_protobuf_messages_no_delimeters( + kafka_produce_protobuf_messages_no_delimiters( kafka_cluster, "pb_no_delimiter", 21, 29 ) @@ -1592,18 +1769,20 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster): assert TSV(result) == TSV(expected) -def test_kafka_materialized_view(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_materialized_view(kafka_cluster, create_query_generator): + topic_name = "mv" + instance.query( - """ + f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'mv', - kafka_group_name = 'mv', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.kafka; + + {create_query_generator("kafka", "key UInt64, value UInt64", topic_list=topic_name, consumer_group="mv")}; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -1615,110 +1794,124 @@ def test_kafka_materialized_view(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "mv", messages) + kafka_produce(kafka_cluster, topic_name, messages) - while True: - result = instance.query("SELECT * FROM test.view") - if kafka_check_result(result): - break + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result = instance.query_with_retry( + "SELECT * FROM test.view", check_callback=kafka_check_result + ) - instance.query( + kafka_check_result(result, True) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.kafka; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - kafka_check_result(result, True) + ) -def test_kafka_recreate_kafka_table(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + ( + generate_new_create_table_query, + r"kafka.*Saved offset [0-9]+ for topic-partition \[recreate_kafka_table:[0-9]+", + ), + ( + generate_old_create_table_query, + "kafka.*Committed offset [0-9]+.*recreate_kafka_table", + ), + ], +) +def test_kafka_recreate_kafka_table(kafka_cluster, create_query_generator, log_line): """ Checks that materialized view work properly after dropping and recreating the Kafka table. """ - # line for backporting: - # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_name = "recreate_kafka_table" - kafka_create_topic(admin_client, topic_name, num_partitions=6) + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - instance.query( + with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=6): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group="recreate_kafka_table_group", + settings={ + "kafka_num_consumers": 4, + "kafka_flush_interval_ms": 1000, + "kafka_skip_broken_messages": 1048577, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'recreate_kafka_table', - kafka_group_name = 'recreate_kafka_table_group', - kafka_format = 'JSONEachRow', - kafka_num_consumers = 6, - kafka_flush_interval_ms = 1000, - kafka_skip_broken_messages = 1048577; + ) - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) + messages = [] + for i in range(120): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "recreate_kafka_table", messages) - messages = [] - for i in range(120): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "recreate_kafka_table", messages) + instance.wait_for_log_line( + log_line, + repetitions=6, + look_behind_lines=100, + ) - instance.wait_for_log_line( - "kafka.*Committed offset [0-9]+.*recreate_kafka_table", - repetitions=6, - look_behind_lines=100, - ) - - instance.query( + instance.query( + """ + DROP TABLE test.kafka; """ - DROP TABLE test.kafka; - """ - ) + ) - kafka_produce(kafka_cluster, "recreate_kafka_table", messages) + instance.rotate_logs() - instance.query( + kafka_produce(kafka_cluster, "recreate_kafka_table", messages) + + instance.query(create_query) + + instance.wait_for_log_line( + log_line, + repetitions=6, + look_behind_lines=100, + ) + + # data was not flushed yet (it will be flushed 7.5 sec after creating MV) + assert int(instance.query("SELECT count() FROM test.view")) == 240 + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.kafka; + DROP TABLE test.view; """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'recreate_kafka_table', - kafka_group_name = 'recreate_kafka_table_group', - kafka_format = 'JSONEachRow', - kafka_num_consumers = 6, - kafka_flush_interval_ms = 1000, - kafka_skip_broken_messages = 1048577; - """ - ) - - instance.wait_for_log_line( - "kafka.*Committed offset [0-9]+.*recreate_kafka_table", - repetitions=6, - look_behind_lines=100, - ) - - # data was not flushed yet (it will be flushed 7.5 sec after creating MV) - assert int(instance.query("SELECT count() FROM test.view")) == 240 - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - kafka_delete_topic(admin_client, topic_name) + ) -def test_librdkafka_compression(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "Committed offset {offset}"), + ( + generate_new_create_table_query, + r"kafka.*Saved offset [0-9]+ for topic-partition \[{topic}:[0-9]+\]", + ), + ], +) +def test_librdkafka_compression(kafka_cluster, create_query_generator, log_line): """ Regression for UB in snappy-c (that is used in librdkafka), backport pr is [1]. @@ -1754,58 +1947,68 @@ def test_librdkafka_compression(kafka_cluster): expected = "\n".join(expected) + admin_client = get_admin_client(kafka_cluster) + for compression_type in supported_compression_types: logging.debug(("Check compression {}".format(compression_type))) topic_name = "test_librdkafka_compression_{}".format(compression_type) - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + topic_config = {"compression.type": compression_type} + with kafka_topic(admin_client, topic_name, config=topic_config): + instance.query( + """{create_query}; - kafka_create_topic( - admin_client, topic_name, config={"compression.type": compression_type} - ) + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree() + ORDER BY key; - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_group_name = '{topic_name}_group', - kafka_format = 'JSONEachRow', - kafka_flush_interval_ms = 1000; - CREATE MATERIALIZED VIEW test.consumer Engine=Log AS - SELECT * FROM test.kafka; - """.format( - topic_name=topic_name + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """.format( + create_query=create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + format="JSONEachRow", + settings={"kafka_flush_interval_ms": 1000}, + ), + ) ) - ) - kafka_produce(kafka_cluster, topic_name, messages) + kafka_produce(kafka_cluster, topic_name, messages) - instance.wait_for_log_line("Committed offset {}".format(number_of_messages)) + instance.wait_for_log_line( + log_line.format(offset=number_of_messages, topic=topic_name) + ) + result = instance.query("SELECT * FROM test.view") + assert TSV(result) == TSV(expected) - result = instance.query("SELECT * FROM test.consumer") - assert TSV(result) == TSV(expected) - - instance.query("DROP TABLE test.kafka SYNC") - instance.query("DROP TABLE test.consumer SYNC") - kafka_delete_topic(admin_client, topic_name) + instance.query("DROP TABLE test.kafka SYNC") + instance.query("DROP TABLE test.consumer SYNC") + instance.query("DROP TABLE test.view SYNC") -def test_kafka_materialized_view_with_subquery(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_new_create_table_query, generate_old_create_table_query], +) +def test_kafka_materialized_view_with_subquery(kafka_cluster, create_query_generator): + topic_name = "mysq" + logging.debug(f"Using topic {topic_name}") + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + ) instance.query( - """ + f""" + DROP TABLE IF EXISTS test.kafka; DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'mvsq', - kafka_group_name = 'mvsq', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + + {create_query}; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -1817,37 +2020,46 @@ def test_kafka_materialized_view_with_subquery(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "mvsq", messages) + kafka_produce(kafka_cluster, topic_name, messages) - while True: - result = instance.query("SELECT * FROM test.view") - if kafka_check_result(result): - break + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result = instance.query_with_retry( + "SELECT * FROM test.view", + check_callback=kafka_check_result, + retry_count=40, + sleep_time=0.75, + ) - instance.query( + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ + ) + + kafka_check_result(result, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_many_materialized_views(kafka_cluster, create_query_generator): + topic_name = f"mmv-{get_topic_postfix(create_query_generator)}" + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}-group", ) - kafka_check_result(result, True) - - -def test_kafka_many_materialized_views(kafka_cluster): instance.query( - """ + f""" DROP TABLE IF EXISTS test.view1; DROP TABLE IF EXISTS test.view2; DROP TABLE IF EXISTS test.consumer1; DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'mmv', - kafka_group_name = 'mmv', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + {create_query}; CREATE TABLE test.view1 (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -1864,237 +2076,258 @@ def test_kafka_many_materialized_views(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "mmv", messages) + kafka_produce(kafka_cluster, topic_name, messages) - while True: - result1 = instance.query("SELECT * FROM test.view1") - result2 = instance.query("SELECT * FROM test.view2") - if kafka_check_result(result1) and kafka_check_result(result2): - break + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result1 = instance.query_with_retry( + "SELECT * FROM test.view1", check_callback=kafka_check_result + ) + result2 = instance.query_with_retry( + "SELECT * FROM test.view2", check_callback=kafka_check_result + ) - instance.query( + instance.query( + """ + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; """ - DROP TABLE test.consumer1; - DROP TABLE test.consumer2; - DROP TABLE test.view1; - DROP TABLE test.view2; - """ - ) + ) - kafka_check_result(result1, True) - kafka_check_result(result2, True) + kafka_check_result(result1, True) + kafka_check_result(result2, True) -def test_kafka_flush_on_big_message(kafka_cluster): - # Create batchs of messages of size ~100Kb +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_flush_on_big_message(kafka_cluster, create_query_generator): + # Create batches of messages of size ~100Kb kafka_messages = 1000 batch_messages = 1000 + topic_name = "flush" + get_topic_postfix(create_query_generator) messages = [ json.dumps({"key": i, "value": "x" * 100}) * batch_messages for i in range(kafka_messages) ] - kafka_produce(kafka_cluster, "flush", messages) + kafka_produce(kafka_cluster, topic_name, messages) - instance.query( + admin_client = get_admin_client(kafka_cluster) + + with existing_kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 10}, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'flush', - kafka_group_name = 'flush', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 10; - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) + ) - client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - received = False - while not received: - try: - offsets = client.list_consumer_group_offsets("flush") - for topic, offset in list(offsets.items()): - if topic.topic == "flush" and offset.offset == kafka_messages: - received = True - break - except kafka.errors.GroupCoordinatorNotAvailableError: - continue + received = False + while not received: + try: + offsets = admin_client.list_consumer_group_offsets(topic_name) + for topic, offset in list(offsets.items()): + if topic.topic == topic_name and offset.offset == kafka_messages: + received = True + break + except kafka.errors.GroupCoordinatorNotAvailableError: + continue - while True: - result = instance.query("SELECT count() FROM test.view") - if int(result) == kafka_messages * batch_messages: - break + while True: + result = instance.query("SELECT count() FROM test.view") + if int(result) == kafka_messages * batch_messages: + break - instance.query( + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) + ) - assert ( - int(result) == kafka_messages * batch_messages - ), "ClickHouse lost some messages: {}".format(result) + assert ( + int(result) == kafka_messages * batch_messages + ), "ClickHouse lost some messages: {}".format(result) def test_kafka_virtual_columns(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", } - kafka_create_topic(admin_client, "virt1", config=topic_config) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'virt1', - kafka_group_name = 'virt1', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - """ - ) - - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "virt1", [messages], 0) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "virt1", [messages], 0) - - result = "" - while True: - result += instance.query( - """SELECT _key, key, _topic, value, _offset, _partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) AS _timestamp FROM test.kafka""", - ignore_error=True, + with kafka_topic(get_admin_client(kafka_cluster), "virt1", config=topic_config): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt1', + kafka_group_name = 'virt1', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + """ ) - if kafka_check_result(result, False, "test_kafka_virtual1.reference"): - break - kafka_check_result(result, True, "test_kafka_virtual1.reference") + messages = "" + for i in range(25): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "virt1", [messages], 0) + + messages = "" + for i in range(25, 50): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "virt1", [messages], 0) + + result = "" + while True: + result += instance.query( + """SELECT _key, key, _topic, value, _offset, _partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) AS _timestamp FROM test.kafka""", + ignore_error=True, + ) + if kafka_check_result(result, False, "test_kafka_virtual1.reference"): + break + + kafka_check_result(result, True, "test_kafka_virtual1.reference") -def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_virtual_columns_with_materialized_view( + kafka_cluster, create_query_generator +): topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", } - kafka_create_topic(admin_client, "virt2", config=topic_config) - - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'virt2', - kafka_group_name = 'virt2', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64, partition UInt64, timestamp Nullable(DateTime('UTC'))) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka; - """ + # the topic name is hardcoded in reference, it doesn't worth to create two reference files to have separate topics, + # as the context manager will always clean up the topic + topic_name = "virt2" + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}-group", ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "virt2", messages, 0) - - sql = "SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key, key" - result = instance.query(sql) - iterations = 0 - while ( - not kafka_check_result(result, False, "test_kafka_virtual2.reference") - and iterations < 10 - ): - time.sleep(3) - iterations += 1 - result = instance.query(sql) - - kafka_check_result(result, True, "test_kafka_virtual2.reference") - - instance.query( + with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64, partition UInt64, timestamp Nullable(DateTime('UTC'))) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) + ) + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, topic_name, messages, 0) -def test_kafka_insert(kafka_cluster): - instance.query( + def check_callback(result): + return kafka_check_result(result, False, "test_kafka_virtual2.reference") + + result = instance.query_with_retry( + "SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key, key", + check_callback=check_callback, + ) + + kafka_check_result(result, True, "test_kafka_virtual2.reference") + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'insert1', - kafka_group_name = 'insert1', - kafka_format = 'TSV', - kafka_commit_on_select = 1, - kafka_row_delimiter = '\\n'; - """ - ) + ) - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) +def insert_with_retry(instance, values, table_name="kafka", max_try_count=5): + try_count = 0 while True: + logging.debug(f"Inserting, try_count is {try_count}") try: - instance.query("INSERT INTO test.kafka VALUES {}".format(values)) + try_count += 1 + instance.query(f"INSERT INTO test.{table_name} VALUES {values}") break except QueryRuntimeException as e: - if "Local: Timed out." in str(e): + if "Local: Timed out." in str(e) and try_count < max_try_count: continue else: raise - messages = [] - while True: - messages.extend(kafka_consume(kafka_cluster, "insert1")) - if len(messages) == 50: - break - result = "\n".join(messages) - kafka_check_result(result, True) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_insert(kafka_cluster, create_query_generator): + topic_name = "insert1" + get_topic_postfix(create_query_generator) - -def test_kafka_produce_consume(kafka_cluster): instance.query( - """ + create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + ) + + message_count = 50 + values = [] + for i in range(message_count): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + insert_with_retry(instance, values) + + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + result = "\n".join(messages) + kafka_check_result(result, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_produce_consume(kafka_cluster, create_query_generator): + topic_name = "insert2" + get_topic_postfix(create_query_generator) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + instance.query( + f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'insert2', - kafka_group_name = 'insert2', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; + {create_query}; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree ORDER BY key; @@ -2111,15 +2344,7 @@ def test_kafka_produce_consume(kafka_cluster): values.append("({i}, {i})".format(i=i)) values = ",".join(values) - while True: - try: - instance.query("INSERT INTO test.kafka VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise + insert_with_retry(instance, values) threads = [] threads_num = 16 @@ -2129,40 +2354,48 @@ def test_kafka_produce_consume(kafka_cluster): time.sleep(random.uniform(0, 1)) thread.start() - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == messages_num * threads_num: - break + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + expected_row_count = messages_num * threads_num + result = instance.query_with_retry( + "SELECT count() FROM test.view", + sleep_time=1, + retry_count=20, + check_callback=lambda result: int(result) == expected_row_count, + ) - instance.query( + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ + ) + + for thread in threads: + thread.join() + + assert ( + int(result) == expected_row_count + ), "ClickHouse lost some messages: {}".format(result) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_commit_on_block_write(kafka_cluster, create_query_generator): + topic_name = "block" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 100}, ) - - for thread in threads: - thread.join() - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) - - -def test_kafka_commit_on_block_write(kafka_cluster): instance.query( - """ + f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'block', - kafka_group_name = 'block', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 100, - kafka_row_delimiter = '\\n'; + {create_query}; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -2173,45 +2406,39 @@ def test_kafka_commit_on_block_write(kafka_cluster): cancel = threading.Event() + # We need to pass i as a reference. Simple integers are passed by value. + # Making an array is probably the easiest way to "force pass by reference". i = [0] - def produce(): + def produce(i): while not cancel.is_set(): messages = [] for _ in range(101): messages.append(json.dumps({"key": i[0], "value": i[0]})) i[0] += 1 - kafka_produce(kafka_cluster, "block", messages) + kafka_produce(kafka_cluster, topic_name, messages) - kafka_thread = threading.Thread(target=produce) + kafka_thread = threading.Thread(target=produce, args=[i]) kafka_thread.start() - while int(instance.query("SELECT count() FROM test.view")) == 0: - time.sleep(1) + instance.query_with_retry( + "SELECT count() FROM test.view", + sleep_time=1, + check_callback=lambda res: int(res) >= 100, + ) cancel.set() - instance.query( - """ - DROP TABLE test.kafka; - """ - ) + instance.query("DROP TABLE test.kafka SYNC") - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'block', - kafka_group_name = 'block', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 100, - kafka_row_delimiter = '\\n'; - """ - ) + instance.query(create_query) + kafka_thread.join() - while int(instance.query("SELECT uniqExact(key) FROM test.view")) < i[0]: - time.sleep(1) + instance.query_with_retry( + "SELECT uniqExact(key) FROM test.view", + sleep_time=1, + check_callback=lambda res: int(res) >= i[0], + ) result = int(instance.query("SELECT count() == uniqExact(key) FROM test.view")) @@ -2227,163 +2454,183 @@ def test_kafka_commit_on_block_write(kafka_cluster): assert result == 1, "Messages from kafka get duplicated!" -def test_kafka_virtual_columns2(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "kafka.*Committed offset 2.*virt2_[01]"), + ( + generate_new_create_table_query, + r"kafka.*Saved offset 2 for topic-partition \[virt2_[01]:[0-9]+", + ), + ], +) +def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line): + admin_client = get_admin_client(kafka_cluster) topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", } - kafka_create_topic(admin_client, "virt2_0", num_partitions=2, config=topic_config) - kafka_create_topic(admin_client, "virt2_1", num_partitions=2, config=topic_config) + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + topic_name_0 = "virt2_0" + topic_name_1 = "virt2_1" + consumer_group = "virt2" + get_topic_postfix(create_query_generator) + with kafka_topic(admin_client, topic_name_0, num_partitions=2, config=topic_config): + with kafka_topic( + admin_client, topic_name_1, num_partitions=2, config=topic_config + ): + create_query = create_query_generator( + "kafka", + "value UInt64", + topic_list=f"{topic_name_0},{topic_name_1}", + consumer_group=consumer_group, + settings={ + "kafka_num_consumers": 2, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) - instance.query( + instance.query( + f""" + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; + """ + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 1}), + partition=0, + key="k1", + timestamp_ms=1577836801001, + headers=[("content-encoding", b"base64")], + ) + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 2}), + partition=0, + key="k2", + timestamp_ms=1577836802002, + headers=[ + ("empty_value", b""), + ("", b"empty name"), + ("", b""), + ("repetition", b"1"), + ("repetition", b"2"), + ], + ) + producer.flush() + + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 3}), + partition=1, + key="k3", + timestamp_ms=1577836803003, + headers=[("b", b"b"), ("a", b"a")], + ) + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 4}), + partition=1, + key="k4", + timestamp_ms=1577836804004, + headers=[("a", b"a"), ("b", b"b")], + ) + producer.flush() + + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 5}), + partition=0, + key="k5", + timestamp_ms=1577836805005, + ) + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 6}), + partition=0, + key="k6", + timestamp_ms=1577836806006, + ) + producer.flush() + + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 7}), + partition=1, + key="k7", + timestamp_ms=1577836807007, + ) + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 8}), + partition=1, + key="k8", + timestamp_ms=1577836808008, + ) + producer.flush() + + instance.wait_for_log_line(log_line, repetitions=4, look_behind_lines=6000) + + members = describe_consumer_group(kafka_cluster, consumer_group) + # pprint.pprint(members) + # members[0]['client_id'] = 'ClickHouse-instance-test-kafka-0' + # members[1]['client_id'] = 'ClickHouse-instance-test-kafka-1' + + result = instance.query( + "SELECT * FROM test.view ORDER BY value", ignore_error=True + ) + + expected = f"""\ + 1 k1 {topic_name_0} 0 0 1577836801 1577836801001 ['content-encoding'] ['base64'] + 2 k2 {topic_name_0} 0 1 1577836802 1577836802002 ['empty_value','','','repetition','repetition'] ['','empty name','','1','2'] + 3 k3 {topic_name_0} 1 0 1577836803 1577836803003 ['b','a'] ['b','a'] + 4 k4 {topic_name_0} 1 1 1577836804 1577836804004 ['a','b'] ['a','b'] + 5 k5 {topic_name_1} 0 0 1577836805 1577836805005 [] [] + 6 k6 {topic_name_1} 0 1 1577836806 1577836806006 [] [] + 7 k7 {topic_name_1} 1 0 1577836807 1577836807007 [] [] + 8 k8 {topic_name_1} 1 1 1577836808 1577836808008 [] [] """ - CREATE TABLE test.kafka (value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'virt2_0,virt2_1', - kafka_group_name = 'virt2', - kafka_num_consumers = 2, - kafka_format = 'JSONEachRow'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; - """ - ) + assert TSV(result) == TSV(expected) - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.view; + """ + ) + instance.rotate_logs() - producer.send( - topic="virt2_0", - value=json.dumps({"value": 1}), - partition=0, - key="k1", - timestamp_ms=1577836801001, - headers=[("content-encoding", b"base64")], - ) - producer.send( - topic="virt2_0", - value=json.dumps({"value": 2}), - partition=0, - key="k2", - timestamp_ms=1577836802002, - headers=[ - ("empty_value", b""), - ("", b"empty name"), - ("", b""), - ("repetition", b"1"), - ("repetition", b"2"), - ], - ) - producer.flush() - producer.send( - topic="virt2_0", - value=json.dumps({"value": 3}), - partition=1, - key="k3", - timestamp_ms=1577836803003, - headers=[("b", b"b"), ("a", b"a")], - ) - producer.send( - topic="virt2_0", - value=json.dumps({"value": 4}), - partition=1, - key="k4", - timestamp_ms=1577836804004, - headers=[("a", b"a"), ("b", b"b")], - ) - producer.flush() - - producer.send( - topic="virt2_1", - value=json.dumps({"value": 5}), - partition=0, - key="k5", - timestamp_ms=1577836805005, - ) - producer.send( - topic="virt2_1", - value=json.dumps({"value": 6}), - partition=0, - key="k6", - timestamp_ms=1577836806006, - ) - producer.flush() - - producer.send( - topic="virt2_1", - value=json.dumps({"value": 7}), - partition=1, - key="k7", - timestamp_ms=1577836807007, - ) - producer.send( - topic="virt2_1", - value=json.dumps({"value": 8}), - partition=1, - key="k8", - timestamp_ms=1577836808008, - ) - producer.flush() - - instance.wait_for_log_line( - "kafka.*Committed offset 2.*virt2_[01]", repetitions=4, look_behind_lines=6000 - ) - - members = describe_consumer_group(kafka_cluster, "virt2") - # pprint.pprint(members) - # members[0]['client_id'] = 'ClickHouse-instance-test-kafka-0' - # members[1]['client_id'] = 'ClickHouse-instance-test-kafka-1' - - result = instance.query("SELECT * FROM test.view ORDER BY value", ignore_error=True) - - expected = """\ -1 k1 virt2_0 0 0 1577836801 1577836801001 ['content-encoding'] ['base64'] -2 k2 virt2_0 0 1 1577836802 1577836802002 ['empty_value','','','repetition','repetition'] ['','empty name','','1','2'] -3 k3 virt2_0 1 0 1577836803 1577836803003 ['b','a'] ['b','a'] -4 k4 virt2_0 1 1 1577836804 1577836804004 ['a','b'] ['a','b'] -5 k5 virt2_1 0 0 1577836805 1577836805005 [] [] -6 k6 virt2_1 0 1 1577836806 1577836806006 [] [] -7 k7 virt2_1 1 0 1577836807 1577836807007 [] [] -8 k8 virt2_1 1 1 1577836808 1577836808008 [] [] -""" - - assert TSV(result) == TSV(expected) - - instance.query( - """ - DROP TABLE test.kafka; - DROP TABLE test.view; - """ - ) - kafka_delete_topic(admin_client, "virt2_0") - kafka_delete_topic(admin_client, "virt2_1") +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_producer_consumer_separate_settings( + kafka_cluster, create_query_generator, do_direct_read +): instance.rotate_logs() - - -def test_kafka_producer_consumer_separate_settings(kafka_cluster): instance.query( - """ - DROP TABLE IF EXISTS test.test_kafka; - CREATE TABLE test.test_kafka (key UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'separate_settings', - kafka_group_name = 'test', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; - """ + create_query_generator( + "test_kafka", + "key UInt64", + topic_list="separate_settings", + consumer_group="test", + ) ) - instance.query("SELECT * FROM test.test_kafka") + if do_direct_read: + instance.query("SELECT * FROM test.test_kafka") instance.query("INSERT INTO test.test_kafka VALUES (1)") assert instance.contains_in_log("Kafka producer created") @@ -2398,11 +2645,11 @@ def test_kafka_producer_consumer_separate_settings(kafka_cluster): # and producer configurations assert "heartbeat.interval.ms" in warn - kafka_consumer_applyed_properties = instance.grep_in_log("Consumer set property") - kafka_producer_applyed_properties = instance.grep_in_log("Producer set property") + kafka_consumer_applied_properties = instance.grep_in_log("Consumer set property") + kafka_producer_applied_properties = instance.grep_in_log("Producer set property") - assert kafka_consumer_applyed_properties is not None - assert kafka_producer_applyed_properties is not None + assert kafka_consumer_applied_properties is not None + assert kafka_producer_applied_properties is not None # global settings should be applied for consumer and producer global_settings = { @@ -2412,118 +2659,128 @@ def test_kafka_producer_consumer_separate_settings(kafka_cluster): for name, value in global_settings.items(): property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applyed_properties - assert property_in_log in kafka_producer_applyed_properties + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties settings_topic__separate_settings__consumer = {"session.timeout.ms": "6001"} for name, value in settings_topic__separate_settings__consumer.items(): property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applyed_properties - assert property_in_log not in kafka_producer_applyed_properties + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log not in kafka_producer_applied_properties producer_settings = {"transaction.timeout.ms": "60001"} for name, value in producer_settings.items(): property_in_log = f"{name}:{value}" - assert property_in_log not in kafka_consumer_applyed_properties - assert property_in_log in kafka_producer_applyed_properties + assert property_in_log not in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties # Should be ignored, because it is inside producer tag producer_legacy_syntax__topic_separate_settings = {"message.timeout.ms": "300001"} for name, value in producer_legacy_syntax__topic_separate_settings.items(): property_in_log = f"{name}:{value}" - assert property_in_log not in kafka_consumer_applyed_properties - assert property_in_log not in kafka_producer_applyed_properties + assert property_in_log not in kafka_consumer_applied_properties + assert property_in_log not in kafka_producer_applied_properties # Old syntax, applied on consumer and producer legacy_syntax__topic_separated_settings = {"heartbeat.interval.ms": "302"} for name, value in legacy_syntax__topic_separated_settings.items(): property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applyed_properties - assert property_in_log in kafka_producer_applyed_properties + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties -def test_kafka_produce_key_timestamp(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_new_create_table_query, "Saved offset 5"), + (generate_old_create_table_query, "Committed offset 5"), + ], +) +def test_kafka_produce_key_timestamp(kafka_cluster, create_query_generator, log_line): topic_name = "insert3" topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", } - kafka_create_topic(admin_client, topic_name, config=topic_config) - instance.query( + with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): + writer_create_query = create_query_generator( + "kafka_writer", + "key UInt64, value UInt64, _key String, _timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + reader_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64, inserted_key String, inserted_timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {writer_create_query}; + {reader_create_query}; + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka_writer (key UInt64, value UInt64, _key String, _timestamp DateTime('UTC')) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'insert3', - kafka_group_name = 'insert3', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; + ) - CREATE TABLE test.kafka (key UInt64, value UInt64, inserted_key String, inserted_timestamp DateTime('UTC')) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'insert3', - kafka_group_name = 'insert3', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 1, 1, "k1", 1577836801 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 2, 2, "k2", 1577836802 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({})),({},{},'{}',toDateTime({}))".format( + 3, 3, "k3", 1577836803, 4, 4, "k4", 1577836804 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 5, 5, "k5", 1577836805 + ) + ) - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; + # instance.wait_for_log_line(log_line) + + expected = """\ + 1 1 k1 1577836801 k1 insert3 0 0 1577836801 + 2 2 k2 1577836802 k2 insert3 0 1 1577836802 + 3 3 k3 1577836803 k3 insert3 0 2 1577836803 + 4 4 k4 1577836804 k4 insert3 0 3 1577836804 + 5 5 k5 1577836805 k5 insert3 0 4 1577836805 """ - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 1, 1, "k1", 1577836801 + result = instance.query_with_retry( + "SELECT * FROM test.view ORDER BY value", + ignore_error=True, + retry_count=5, + sleep_time=1, + check_callback=lambda res: TSV(res) == TSV(expected), ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 2, 2, "k2", 1577836802 - ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({})),({},{},'{}',toDateTime({}))".format( - 3, 3, "k3", 1577836803, 4, 4, "k4", 1577836804 - ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 5, 5, "k5", 1577836805 - ) - ) - instance.wait_for_log_line("Committed offset 5") - - result = instance.query("SELECT * FROM test.view ORDER BY value", ignore_error=True) - - # logging.debug(result) - - expected = """\ -1 1 k1 1577836801 k1 insert3 0 0 1577836801 -2 2 k2 1577836802 k2 insert3 0 1 1577836802 -3 3 k3 1577836803 k3 insert3 0 2 1577836803 -4 4 k4 1577836804 k4 insert3 0 3 1577836804 -5 5 k5 1577836805 k5 insert3 0 4 1577836805 -""" - - assert TSV(result) == TSV(expected) - - kafka_delete_topic(admin_client, topic_name) + assert TSV(result) == TSV(expected) -def test_kafka_insert_avro(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_insert_avro(kafka_cluster, create_query_generator): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2531,296 +2788,327 @@ def test_kafka_insert_avro(kafka_cluster): # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", } - kafka_create_topic(admin_client, "avro1", config=topic_config) - - instance.query( - """ - DROP TABLE IF EXISTS test.kafka; - CREATE TABLE test.kafka (key UInt64, value UInt64, _timestamp DateTime('UTC')) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'avro1', - kafka_group_name = 'avro1', - kafka_commit_on_select = 1, - kafka_format = 'Avro'; - """ - ) - - instance.query( - "INSERT INTO test.kafka select number*10 as key, number*100 as value, 1636505534 as _timestamp from numbers(4) SETTINGS output_format_avro_rows_in_file = 2, output_format_avro_codec = 'deflate'" - ) - - messages = [] - while True: - messages.extend( - kafka_consume( - kafka_cluster, "avro1", needDecode=False, timestamp=1636505534 - ) + topic_name = "avro1" + get_topic_postfix(create_query_generator) + with kafka_topic(admin_client, topic_name, config=topic_config): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64, _timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + {create_query} + """ ) - if len(messages) == 2: - break - result = "" - for a_message in messages: - result += decode_avro(a_message) + "\n" + instance.query( + "INSERT INTO test.kafka select number*10 as key, number*100 as value, 1636505534 as _timestamp from numbers(4) SETTINGS output_format_avro_rows_in_file = 2, output_format_avro_codec = 'deflate'" + ) - expected_result = """{'key': 0, 'value': 0, '_timestamp': 1636505534} + message_count = 2 + messages = kafka_consume_with_retry( + kafka_cluster, + topic_name, + message_count, + need_decode=False, + timestamp=1636505534, + ) + + result = "" + for a_message in messages: + result += decode_avro(a_message) + "\n" + + expected_result = """{'key': 0, 'value': 0, '_timestamp': 1636505534} {'key': 10, 'value': 100, '_timestamp': 1636505534} {'key': 20, 'value': 200, '_timestamp': 1636505534} {'key': 30, 'value': 300, '_timestamp': 1636505534} """ - assert result == expected_result + assert result == expected_result -def test_kafka_produce_consume_avro(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_produce_consume_avro(kafka_cluster, create_query_generator): + topic_name = "insert_avro" + get_topic_postfix(create_query_generator) + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + num_rows = 75 - topic_name = "insert_avro" - kafka_create_topic(admin_client, topic_name) - - num_rows = 75 - - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka_writer; - - CREATE TABLE test.kafka_writer (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'avro', - kafka_group_name = 'avro', - kafka_format = 'Avro'; - - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'avro', - kafka_group_name = 'avro', - kafka_format = 'Avro'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.kafka; - """ - ) - - instance.query( - "INSERT INTO test.kafka_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format( - num_rows=num_rows + writer_create_query = create_query_generator( + "kafka_writer", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", ) - ) - instance.wait_for_log_line( - "Committed offset {offset}".format(offset=math.ceil(num_rows / 7)) - ) + reader_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", + ) - expected_num_rows = instance.query( - "SELECT COUNT(1) FROM test.view", ignore_error=True - ) - assert int(expected_num_rows) == num_rows - - expected_max_key = instance.query( - "SELECT max(key) FROM test.view", ignore_error=True - ) - assert int(expected_max_key) == (num_rows - 1) * 10 - - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_flush_by_time(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_name = "flush_by_time" - kafka_create_topic(admin_client, topic_name) - - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'flush_by_time', - kafka_group_name = 'flush_by_time', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 100, - kafka_row_delimiter = '\\n'; - - SELECT * FROM test.kafka; - - CREATE TABLE test.view (key UInt64, value UInt64, ts DateTime64(3) MATERIALIZED now64(3)) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - cancel = threading.Event() - - def produce(): - while not cancel.is_set(): - messages = [] - messages.append(json.dumps({"key": 0, "value": 0})) - kafka_produce(kafka_cluster, "flush_by_time", messages) - time.sleep(0.8) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - time.sleep(18) - - result = instance.query("SELECT uniqExact(ts) = 2, count() >= 15 FROM test.view") - - cancel.set() - kafka_thread.join() - - # kafka_cluster.open_bash_shell('instance') - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV("1 1") - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_flush_by_block_size(kafka_cluster): - cancel = threading.Event() - - def produce(): - while not cancel.is_set(): - messages = [] - messages.append(json.dumps({"key": 0, "value": 0})) - kafka_produce(kafka_cluster, "flush_by_block_size", messages) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'flush_by_block_size', - kafka_group_name = 'flush_by_block_size', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 100, - kafka_poll_max_batch_size = 1, - kafka_flush_interval_ms = 120000, /* should not flush by time during test */ - kafka_row_delimiter = '\\n'; - - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - # Wait for Kafka engine to consume this data - while 1 != int( instance.query( - "SELECT count() FROM system.parts WHERE database = 'test' AND table = 'view' AND name = 'all_1_1_0'" + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka_writer; + + {writer_create_query}; + {reader_create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT key, value FROM test.kafka; + """ ) - ): - time.sleep(0.5) - cancel.set() - kafka_thread.join() + instance.query( + "INSERT INTO test.kafka_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format( + num_rows=num_rows + ) + ) - # more flushes can happens during test, we need to check only result of first flush (part named all_1_1_0). - result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") - # logging.debug(result) + instance.wait_for_log_line( + "Committed offset {offset}".format(offset=math.ceil(num_rows / 7)) + ) - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ + expected_num_rows = instance.query( + "SELECT COUNT(1) FROM test.view", ignore_error=True + ) + assert int(expected_num_rows) == num_rows + + expected_max_key = instance.query( + "SELECT max(key) FROM test.view", ignore_error=True + ) + assert int(expected_max_key) == (num_rows - 1) * 10 + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_flush_by_time(kafka_cluster, create_query_generator): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) + topic_name = "flush_by_time" + get_topic_postfix(create_query_generator) - # 100 = first poll should return 100 messages (and rows) - # not waiting for stream_flush_interval_ms - assert ( - int(result) == 100 - ), "Messages from kafka should be flushed when block of size kafka_max_block_size is formed!" + with kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 100, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + + CREATE TABLE test.view (key UInt64, value UInt64, ts DateTime64(3) MATERIALIZED now64(3)) + ENGINE = MergeTree() + ORDER BY key; + """ + ) + + cancel = threading.Event() + + def produce(): + while not cancel.is_set(): + messages = [json.dumps({"key": 0, "value": 0})] + kafka_produce(kafka_cluster, topic_name, messages) + time.sleep(0.8) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() + + instance.query( + """ + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + # By default the flush timeout should be 7.5 seconds => 18 seconds should be enough for 2 flushes, but not for 3 + time.sleep(18) + + result = instance.query("SELECT uniqExact(ts), count() >= 15 FROM test.view") + + cancel.set() + kafka_thread.join() + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + assert TSV(result) == TSV("2 1") -def test_kafka_lot_of_partitions_partial_commit_of_bulk(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_flush_by_block_size(kafka_cluster, create_query_generator): + topic_name = "flush_by_block_size" + get_topic_postfix(create_query_generator) + + cancel = threading.Event() + + def produce(): + while not cancel.is_set(): + messages = [] + messages.append(json.dumps({"key": 0, "value": 0})) + kafka_produce(kafka_cluster, topic_name, messages) + + kafka_thread = threading.Thread(target=produce) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + kafka_thread.start() + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 100, + "kafka_poll_max_batch_size": 1, + "kafka_flush_interval_ms": 120000, + }, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + # Wait for Kafka engine to consume this data + while 1 != int( + instance.query( + "SELECT count() FROM system.parts WHERE database = 'test' AND table = 'view' AND name = 'all_1_1_0'" + ) + ): + time.sleep(0.5) + + cancel.set() + kafka_thread.join() + + # more flushes can happens during test, we need to check only result of first flush (part named all_1_1_0). + result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") + # logging.debug(result) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + # 100 = first poll should return 100 messages (and rows) + # not waiting for stream_flush_interval_ms + assert ( + int(result) == 100 + ), "Messages from kafka should be flushed when block of size kafka_max_block_size is formed!" + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_lot_of_partitions_partial_commit_of_bulk( + kafka_cluster, create_query_generator +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - topic_name = "topic_with_multiple_partitions2" - kafka_create_topic(admin_client, topic_name, num_partitions=10) - - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'topic_with_multiple_partitions2', - kafka_group_name = 'topic_with_multiple_partitions2', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 211, - kafka_flush_interval_ms = 500; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ + topic_name = "topic_with_multiple_partitions2" + get_topic_postfix( + create_query_generator ) - - messages = [] - count = 0 - for dummy_msg in range(1000): - rows = [] - for dummy_row in range(random.randrange(3, 10)): - count = count + 1 - rows.append(json.dumps({"key": count, "value": count})) - messages.append("\n".join(rows)) - kafka_produce(kafka_cluster, "topic_with_multiple_partitions2", messages) - - instance.wait_for_log_line("kafka.*Stalled", repetitions=5) - - result = instance.query("SELECT count(), uniqExact(key), max(key) FROM test.view") - logging.debug(result) - assert TSV(result) == TSV("{0}\t{0}\t{0}".format(count)) - - instance.query( + with kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 211, + "kafka_flush_interval_ms": 500, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - kafka_delete_topic(admin_client, topic_name) + ) + + messages = [] + count = 0 + for dummy_msg in range(1000): + rows = [] + for dummy_row in range(random.randrange(3, 10)): + count = count + 1 + rows.append(json.dumps({"key": count, "value": count})) + messages.append("\n".join(rows)) + kafka_produce(kafka_cluster, topic_name, messages) + + instance.wait_for_log_line("kafka.*Stalled", repetitions=5) + + result = instance.query( + "SELECT count(), uniqExact(key), max(key) FROM test.view" + ) + logging.debug(result) + assert TSV(result) == TSV("{0}\t{0}\t{0}".format(count)) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) -def test_kafka_rebalance(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "{}.*Polled offset [0-9]+"), + (generate_new_create_table_query, "{}.*Saved offset"), + ], +) +def test_kafka_rebalance(kafka_cluster, create_query_generator, log_line): NUMBER_OF_CONSURRENT_CONSUMERS = 11 instance.query( @@ -2841,212 +3129,232 @@ def test_kafka_rebalance(kafka_cluster): """ ) - # kafka_cluster.open_bash_shell('instance') - - # time.sleep(2) - admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - topic_name = "topic_with_multiple_partitions" - kafka_create_topic(admin_client, topic_name, num_partitions=11) + topic_name = "topic_with_multiple_partitions" + get_topic_postfix( + create_query_generator + ) + table_name_prefix = "kafka_consumer" + keeper_path = f"/clickhouse/{{database}}/{table_name_prefix}" + with kafka_topic(admin_client, topic_name, num_partitions=11): + cancel = threading.Event() - cancel = threading.Event() + msg_index = [0] - msg_index = [0] + def produce(): + while not cancel.is_set(): + messages = [] + for _ in range(59): + messages.append( + json.dumps({"key": msg_index[0], "value": msg_index[0]}) + ) + msg_index[0] += 1 + kafka_produce(kafka_cluster, topic_name, messages) - def produce(): - while not cancel.is_set(): - messages = [] - for _ in range(59): - messages.append( - json.dumps({"key": msg_index[0], "value": msg_index[0]}) - ) - msg_index[0] += 1 - kafka_produce(kafka_cluster, "topic_with_multiple_partitions", messages) + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): + table_name = f"{table_name_prefix}{consumer_index}" + replica_name = f"r{consumer_index}" + logging.debug(f"Setting up {consumer_index}") - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): - table_name = "kafka_consumer{}".format(consumer_index) - logging.debug(("Setting up {}".format(table_name))) - - instance.query( - """ - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - CREATE TABLE test.{0} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'topic_with_multiple_partitions', - kafka_group_name = 'rebalance_test_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 33, - kafka_flush_interval_ms = 500; - CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS - SELECT - key, - value, - _topic, - _key, - _offset, - _partition, - _timestamp, - '{0}' as _consumed_by - FROM test.{0}; - """.format( - table_name + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + keeper_path=keeper_path, + replica_name=replica_name, + settings={ + "kafka_max_block_size": 33, + "kafka_flush_interval_ms": 500, + }, ) - ) + instance.query( + f""" + DROP TABLE IF EXISTS test.{table_name}; + DROP TABLE IF EXISTS test.{table_name}_mv; + {create_query}; + CREATE MATERIALIZED VIEW test.{table_name}_mv TO test.destination AS + SELECT + key, + value, + _topic, + _key, + _offset, + _partition, + _timestamp, + '{table_name}' as _consumed_by + FROM test.{table_name}; + """ + ) + # kafka_cluster.open_bash_shell('instance') + # Waiting for test.kafka_consumerX to start consume ... + instance.wait_for_log_line(log_line.format(table_name)) + + cancel.set() + + # I leave last one working by intent (to finish consuming after all rebalances) + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS - 1): + logging.debug(("Dropping test.kafka_consumer{}".format(consumer_index))) + instance.query( + "DROP TABLE IF EXISTS test.kafka_consumer{} SYNC".format(consumer_index) + ) + + # logging.debug(instance.query('SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination')) # kafka_cluster.open_bash_shell('instance') - # Waiting for test.kafka_consumerX to start consume ... - instance.wait_for_log_line( - "kafka_consumer{}.*Polled offset [0-9]+".format(consumer_index) - ) - cancel.set() + while 1: + messages_consumed = int( + instance.query("SELECT uniqExact(key) FROM test.destination") + ) + if messages_consumed >= msg_index[0]: + break + time.sleep(1) + logging.debug( + ( + "Waiting for finishing consuming (have {}, should be {})".format( + messages_consumed, msg_index[0] + ) + ) + ) - # I leave last one working by intent (to finish consuming after all rebalances) - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS - 1): - logging.debug(("Dropping test.kafka_consumer{}".format(consumer_index))) - instance.query( - "DROP TABLE IF EXISTS test.kafka_consumer{} SYNC".format(consumer_index) - ) - - # logging.debug(instance.query('SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination')) - # kafka_cluster.open_bash_shell('instance') - - while 1: - messages_consumed = int( - instance.query("SELECT uniqExact(key) FROM test.destination") - ) - if messages_consumed >= msg_index[0]: - break - time.sleep(1) logging.debug( ( - "Waiting for finishing consuming (have {}, should be {})".format( - messages_consumed, msg_index[0] + instance.query( + "SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination" ) ) ) - logging.debug( - ( - instance.query( - "SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination" - ) + # Some queries to debug... + # SELECT * FROM test.destination where key in (SELECT key FROM test.destination group by key having count() <> 1) + # select number + 1 as key from numbers(4141) x left join test.destination using (key) where test.destination.key = 0; + # SELECT * FROM test.destination WHERE key between 2360 and 2370 order by key; + # select _partition from test.destination group by _partition having count() <> max(_offset) + 1; + # select toUInt64(0) as _partition, number + 1 as _offset from numbers(400) x left join test.destination using (_partition,_offset) where test.destination.key = 0 order by _offset; + # SELECT * FROM test.destination WHERE _partition = 0 and _offset between 220 and 240 order by _offset; + + # CREATE TABLE test.reference (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', + # kafka_topic_list = 'topic_with_multiple_partitions', + # kafka_group_name = 'rebalance_test_group_reference', + # kafka_format = 'JSONEachRow', + # kafka_max_block_size = 100000; + # + # CREATE MATERIALIZED VIEW test.reference_mv Engine=Log AS + # SELECT key, value, _topic,_key,_offset, _partition, _timestamp, 'reference' as _consumed_by + # FROM test.reference; + # + # select * from test.reference_mv left join test.destination using (key,_topic,_offset,_partition) where test.destination._consumed_by = ''; + + result = int( + instance.query("SELECT count() == uniqExact(key) FROM test.destination") ) - ) - # Some queries to debug... - # SELECT * FROM test.destination where key in (SELECT key FROM test.destination group by key having count() <> 1) - # select number + 1 as key from numbers(4141) x left join test.destination using (key) where test.destination.key = 0; - # SELECT * FROM test.destination WHERE key between 2360 and 2370 order by key; - # select _partition from test.destination group by _partition having count() <> max(_offset) + 1; - # select toUInt64(0) as _partition, number + 1 as _offset from numbers(400) x left join test.destination using (_partition,_offset) where test.destination.key = 0 order by _offset; - # SELECT * FROM test.destination WHERE _partition = 0 and _offset between 220 and 240 order by _offset; + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): + logging.debug(("kafka_consumer{}".format(consumer_index))) + table_name = "kafka_consumer{}".format(consumer_index) + instance.query( + """ + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + """.format( + table_name + ) + ) - # CREATE TABLE test.reference (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - # kafka_topic_list = 'topic_with_multiple_partitions', - # kafka_group_name = 'rebalance_test_group_reference', - # kafka_format = 'JSONEachRow', - # kafka_max_block_size = 100000; - # - # CREATE MATERIALIZED VIEW test.reference_mv Engine=Log AS - # SELECT key, value, _topic,_key,_offset, _partition, _timestamp, 'reference' as _consumed_by - # FROM test.reference; - # - # select * from test.reference_mv left join test.destination using (key,_topic,_offset,_partition) where test.destination._consumed_by = ''; - - result = int( - instance.query("SELECT count() == uniqExact(key) FROM test.destination") - ) - - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): - logging.debug(("kafka_consumer{}".format(consumer_index))) - table_name = "kafka_consumer{}".format(consumer_index) instance.query( """ - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - """.format( - table_name - ) + DROP TABLE IF EXISTS test.destination; + """ ) - instance.query( + kafka_thread.join() + + assert result == 1, "Messages from kafka get duplicated!" + + +# TODO(antaljanosbenjamin): find another way to make insertion fail +@pytest.mark.parametrize( + "create_query_generator", + [ + generate_old_create_table_query, + # generate_new_create_table_query, + ], +) +def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster, create_query_generator): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + topic_name = "no_holes_when_write_suffix_failed" + get_topic_postfix( + create_query_generator + ) + + with kafka_topic(admin_client, topic_name): + messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] + kafka_produce(kafka_cluster, topic_name, messages) + + create_query = create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 20, + "kafka_flush_interval_ms": 2000, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view SYNC; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + + CREATE TABLE test.view (key UInt64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/{topic_name}', 'node1') + ORDER BY key; """ - DROP TABLE IF EXISTS test.destination; - """ - ) + ) - kafka_thread.join() + # init PartitionManager (it starts container) earlier + pm = PartitionManager() - assert result == 1, "Messages from kafka get duplicated!" - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): - messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] - kafka_produce(kafka_cluster, "no_holes_when_write_suffix_failed", messages) - - instance.query( + instance.query( + """ + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka + WHERE NOT sleepEachRow(0.25); """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; + ) - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'no_holes_when_write_suffix_failed', - kafka_group_name = 'no_holes_when_write_suffix_failed', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 20, - kafka_flush_interval_ms = 2000; + instance.wait_for_log_line("Polled batch of 20 messages") + # the tricky part here is that disconnect should happen after write prefix, but before write suffix + # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" + # while materialized view is working to inject zookeeper failure + pm.drop_instance_zk_connections(instance) + instance.wait_for_log_line( + "Error.*(Connection loss|Coordination::Exception).*while pushing to view" + ) + pm.heal_all() + instance.wait_for_log_line("Committed offset 22") - CREATE TABLE test.view (key UInt64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/no_holes_when_write_suffix_failed', 'node1') - ORDER BY key; - """ - ) + result = instance.query( + "SELECT count(), uniqExact(key), max(key) FROM test.view" + ) + logging.debug(result) - # init PartitionManager (it starts container) earlier - pm = PartitionManager() + # kafka_cluster.open_bash_shell('instance') - instance.query( + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view SYNC; """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka - WHERE NOT sleepEachRow(0.25); - """ - ) + ) - instance.wait_for_log_line("Polled batch of 20 messages") - # the tricky part here is that disconnect should happen after write prefix, but before write suffix - # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" - # while materialized view is working to inject zookeeper failure - pm.drop_instance_zk_connections(instance) - instance.wait_for_log_line( - "Error.*(Connection loss|Coordination::Exception).*while pushing to view" - ) - pm.heal_all() - instance.wait_for_log_line("Committed offset 22") - - result = instance.query("SELECT count(), uniqExact(key), max(key) FROM test.view") - logging.debug(result) - - # kafka_cluster.open_bash_shell('instance') - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV("22\t22\t22") + assert TSV(result) == TSV("22\t22\t22") def test_exception_from_destructor(kafka_cluster): @@ -3092,12 +3400,30 @@ def test_exception_from_destructor(kafka_cluster): assert TSV(instance.query("SELECT 1")) == TSV("1") -def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_commits_of_unprocessed_messages_on_drop(kafka_cluster, create_query_generator): + topic_name = "commits_of_unprocessed_messages_on_drop" + get_topic_postfix( + create_query_generator + ) messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(1)] - kafka_produce(kafka_cluster, "commits_of_unprocessed_messages_on_drop", messages) + kafka_produce(kafka_cluster, topic_name, messages) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}_test_group", + settings={ + "kafka_max_block_size": 1000, + "kafka_flush_interval_ms": 1000, + }, + ) instance.query( - """ + f""" DROP TABLE IF EXISTS test.destination SYNC; CREATE TABLE test.destination ( key UInt64, @@ -3112,14 +3438,7 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): ENGINE = MergeTree() ORDER BY key; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'commits_of_unprocessed_messages_on_drop', - kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1000, - kafka_flush_interval_ms = 1000; + {create_query}; CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS SELECT @@ -3147,9 +3466,7 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): for _ in range(113): messages.append(json.dumps({"key": i[0], "value": i[0]})) i[0] += 1 - kafka_produce( - kafka_cluster, "commits_of_unprocessed_messages_on_drop", messages - ) + kafka_produce(kafka_cluster, topic_name, messages) time.sleep(0.5) kafka_thread = threading.Thread(target=produce) @@ -3162,18 +3479,17 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): """ ) - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'commits_of_unprocessed_messages_on_drop', - kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 10000, - kafka_flush_interval_ms = 1000; - """ + new_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}_test_group", + settings={ + "kafka_max_block_size": 10000, + "kafka_flush_interval_ms": 1000, + }, ) + instance.query(new_create_query) cancel.set() instance.wait_for_log_line("kafka.*Stalled", repetitions=5) @@ -3197,22 +3513,31 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): assert TSV(result) == TSV("{0}\t{0}\t{0}".format(i[0] - 1)), "Missing data!" -def test_bad_reschedule(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_bad_reschedule(kafka_cluster, create_query_generator): + topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) + messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] - kafka_produce(kafka_cluster, "test_bad_reschedule", messages) + kafka_produce(kafka_cluster, topic_name, messages) + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 1000, + "kafka_flush_interval_ms": 1000, + }, + ) instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'test_bad_reschedule', - kafka_group_name = 'test_bad_reschedule', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1000, - kafka_flush_interval_ms = 1000; + f""" + {create_query}; - CREATE MATERIALIZED VIEW test.destination Engine=Log AS + CREATE MATERIALIZED VIEW test.destination ENGINE=MergeTree ORDER BY tuple() AS SELECT key, now() as consume_ts, @@ -3308,21 +3633,26 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster): # if we came to partition end we will repeat polling until reaching kafka_max_block_size or flush_interval -# that behavior is a bit quesionable - we can just take a bigger pauses between polls instead - +# that behavior is a bit questionable - we can just take a bigger pauses between polls instead - # to do more job in a single pass, and give more rest for a thread. # But in cases of some peaky loads in kafka topic the current contract sounds more predictable and # easier to understand, so let's keep it as is for now. # also we can came to eof because we drained librdkafka internal queue too fast -def test_premature_flush_on_eof(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_premature_flush_on_eof(kafka_cluster, create_query_generator): + topic_name = "premature_flush_on_eof" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + ) instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'premature_flush_on_eof', - kafka_group_name = 'premature_flush_on_eof', - kafka_format = 'JSONEachRow'; - SELECT * FROM test.kafka LIMIT 1; + f""" + {create_query}; CREATE TABLE test.destination ( key UInt64, value UInt64, @@ -3338,13 +3668,13 @@ def test_premature_flush_on_eof(kafka_cluster): """ ) - # messages created here will be consumed immedeately after MV creation + # messages created here will be consumed immediately after MV creation # reaching topic EOF. - # But we should not do flush immedeately after reaching EOF, because + # But we should not do flush immediately after reaching EOF, because # next poll can return more data, and we should respect kafka_flush_interval_ms # and try to form bigger block - messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(1)] - kafka_produce(kafka_cluster, "premature_flush_on_eof", messages) + messages = [json.dumps({"key": 1, "value": 1})] + kafka_produce(kafka_cluster, topic_name, messages) instance.query( """ @@ -3368,7 +3698,7 @@ def test_premature_flush_on_eof(kafka_cluster): instance.wait_for_log_line("Stalled") # produce more messages after delay - kafka_produce(kafka_cluster, "premature_flush_on_eof", messages) + kafka_produce(kafka_cluster, topic_name, messages) # data was not flushed yet (it will be flushed 7.5 sec after creating MV) assert int(instance.query("SELECT count() FROM test.destination")) == 0 @@ -3389,58 +3719,81 @@ def test_premature_flush_on_eof(kafka_cluster): ) -def test_kafka_unavailable(kafka_cluster): - messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] - kafka_produce(kafka_cluster, "test_bad_reschedule", messages) +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_unavailable(kafka_cluster, create_query_generator, do_direct_read): + number_of_messages = 20000 + topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) + messages = [ + json.dumps({"key": j + 1, "value": j + 1}) for j in range(number_of_messages) + ] + kafka_produce(kafka_cluster, topic_name, messages) - kafka_cluster.pause_container("kafka1") + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + kafka_cluster.pause_container("kafka1") - instance.query( + create_query = create_query_generator( + "test_bad_reschedule", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 1000}, + ) + instance.query( + f""" + {create_query}; + + CREATE MATERIALIZED VIEW test.destination_unavailable ENGINE=MergeTree ORDER BY tuple() AS + SELECT + key, + now() as consume_ts, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.test_bad_reschedule; """ - CREATE TABLE test.test_bad_reschedule (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'test_bad_reschedule', - kafka_group_name = 'test_bad_reschedule', - kafka_format = 'JSONEachRow', - kafka_commit_on_select = 1, - kafka_max_block_size = 1000; + ) - CREATE MATERIALIZED VIEW test.destination_unavailable Engine=Log AS - SELECT - key, - now() as consume_ts, - value, - _topic, - _key, - _offset, - _partition, - _timestamp - FROM test.test_bad_reschedule; - """ - ) + if do_direct_read: + instance.query("SELECT * FROM test.test_bad_reschedule") + instance.query("SELECT count() FROM test.destination_unavailable") - instance.query("SELECT * FROM test.test_bad_reschedule") - instance.query("SELECT count() FROM test.destination_unavailable") + # enough to trigger issue + time.sleep(30) + kafka_cluster.unpause_container("kafka1") - # enough to trigger issue - time.sleep(30) - kafka_cluster.unpause_container("kafka1") + result = instance.query_with_retry( + "SELECT count() FROM test.destination_unavailable", + sleep_time=1, + check_callback=lambda res: int(res) == number_of_messages, + ) - while ( - int(instance.query("SELECT count() FROM test.destination_unavailable")) < 20000 - ): - print("Waiting for consume") - time.sleep(1) + assert int(result) == number_of_messages -def test_kafka_issue14202(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_issue14202(kafka_cluster, create_query_generator): """ INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure """ + topic_name = "issue14202" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka_q", + "t UInt64, some_string String", + topic_list=topic_name, + consumer_group=topic_name, + ) instance.query( - """ + f""" CREATE TABLE test.empty_table ( dt Date, some_string String @@ -3449,12 +3802,7 @@ def test_kafka_issue14202(kafka_cluster): PARTITION BY toYYYYMM(dt) ORDER BY some_string; - CREATE TABLE test.kafka_q (t UInt64, `some_string` String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'issue14202', - kafka_group_name = 'issue14202', - kafka_format = 'JSONEachRow'; + {create_query}; """ ) @@ -3505,20 +3853,30 @@ def random_string(size=8): return "".join(random.choices(string.ascii_uppercase + string.digits, k=size)) -def test_kafka_engine_put_errors_to_stream(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_engine_put_errors_to_stream(kafka_cluster, create_query_generator): + topic_name = "kafka_engine_put_errors_to_stream" + get_topic_postfix( + create_query_generator + ) + create_query = create_query_generator( + "kafka", + "i Int64, s String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 128, + "kafka_handle_error_mode": "stream", + }, + ) instance.query( - """ + f""" DROP TABLE IF EXISTS test.kafka; DROP TABLE IF EXISTS test.kafka_data; DROP TABLE IF EXISTS test.kafka_errors; - CREATE TABLE test.kafka (i Int64, s String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_engine_put_errors_to_stream', - kafka_group_name = 'kafka_engine_put_errors_to_stream', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 128, - kafka_handle_error_mode = 'stream'; + {create_query}; CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) ENGINE = MergeTree ORDER BY i @@ -3546,19 +3904,20 @@ def test_kafka_engine_put_errors_to_stream(kafka_cluster): json.dumps({"i": "n_" + random_string(4), "s": random_string(8)}) ) - kafka_produce(kafka_cluster, "kafka_engine_put_errors_to_stream", messages) - instance.wait_for_log_line("Committed offset 128") + kafka_produce(kafka_cluster, topic_name, messages) + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + instance.wait_for_log_line("Committed offset 128") - assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("64") - assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") + assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("64") + assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") - instance.query( + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.kafka_data; + DROP TABLE test.kafka_errors; """ - DROP TABLE test.kafka; - DROP TABLE test.kafka_data; - DROP TABLE test.kafka_errors; - """ - ) + ) def gen_normal_json(): @@ -3587,21 +3946,35 @@ def gen_message_with_jsons(jsons=10, malformed=0): return s.getvalue() -def test_kafka_engine_put_errors_to_stream_with_random_malformed_json(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( + kafka_cluster, create_query_generator +): + topic_name = ( + "kafka_engine_put_errors_to_stream_with_random_malformed_json" + + get_topic_postfix(create_query_generator) + ) + create_query = create_query_generator( + "kafka", + "i Int64, s String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 100, + "kafka_poll_max_batch_size": 1, + "kafka_handle_error_mode": "stream", + }, + ) + instance.query( - """ + f""" DROP TABLE IF EXISTS test.kafka; DROP TABLE IF EXISTS test.kafka_data; DROP TABLE IF EXISTS test.kafka_errors; - CREATE TABLE test.kafka (i Int64, s String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_engine_put_errors_to_stream_with_random_malformed_json', - kafka_group_name = 'kafka_engine_put_errors_to_stream_with_random_malformed_json', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 100, - kafka_poll_max_batch_size = 1, - kafka_handle_error_mode = 'stream'; + {create_query}; CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) ENGINE = MergeTree ORDER BY i @@ -3626,28 +3999,28 @@ def test_kafka_engine_put_errors_to_stream_with_random_malformed_json(kafka_clus else: messages.append(gen_message_with_jsons(10, 0)) - kafka_produce( - kafka_cluster, - "kafka_engine_put_errors_to_stream_with_random_malformed_json", - messages, - ) + kafka_produce(kafka_cluster, topic_name, messages) + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + instance.wait_for_log_line("Committed offset 128") + # 64 good messages, each containing 10 rows + assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("640") + # 64 bad messages, each containing some broken row + assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") - instance.wait_for_log_line("Committed offset 128") - # 64 good messages, each containing 10 rows - assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("640") - # 64 bad messages, each containing some broken row - assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") - - instance.query( + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.kafka_data; + DROP TABLE test.kafka_errors; """ - DROP TABLE test.kafka; - DROP TABLE test.kafka_data; - DROP TABLE test.kafka_errors; - """ - ) + ) -def test_kafka_formats_with_broken_message(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator): # data was dumped from clickhouse itself in a following manner # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g' admin_client = KafkaAdminClient( @@ -3665,7 +4038,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '{"id":"0","blockNo":"BAD","val1":"AM","val2":0.5,"val3":1}', ], - "expected": """{"raw_message":"{\\"id\\":\\"0\\",\\"blockNo\\":\\"BAD\\",\\"val1\\":\\"AM\\",\\"val2\\":0.5,\\"val3\\":1}","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"val1\\":\\"AM\\",\\"val2\\":0.5,\\"val3\\":1}': (while reading the value of key blockNo)"}""", + "expected": { + "raw_message": '{"id":"0","blockNo":"BAD","val1":"AM","val2":0.5,"val3":1}', + "error": 'Cannot parse input: expected \'"\' before: \'BAD","val1":"AM","val2":0.5,"val3":1}\': (while reading the value of key blockNo)', + }, "supports_empty_value": True, "printable": True, }, @@ -3678,7 +4054,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '["0", "BAD", "AM", 0.5, 1]', ], - "expected": """{"raw_message":"[\\"0\\", \\"BAD\\", \\"AM\\", 0.5, 1]","error":"Cannot parse input: expected '\\"' before: 'BAD\\", \\"AM\\", 0.5, 1]': (while reading the value of key blockNo)"}""", + "expected": { + "raw_message": '["0", "BAD", "AM", 0.5, 1]', + "error": "Cannot parse input: expected '\"' before: 'BAD\", \"AM\", 0.5, 1]': (while reading the value of key blockNo)", + }, "supports_empty_value": True, "printable": True, }, @@ -3690,7 +4069,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '["0", "BAD", "AM", 0.5, 1]', ], - "expected": """{"raw_message":"[\\"0\\", \\"BAD\\", \\"AM\\", 0.5, 1]","error":"Cannot parse JSON string: expected opening quote"}""", + "expected": { + "raw_message": '["0", "BAD", "AM", 0.5, 1]', + "error": "Cannot parse JSON string: expected opening quote", + }, "printable": True, }, "TSKV": { @@ -3701,7 +4083,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "id=0\tblockNo=BAD\tval1=AM\tval2=0.5\tval3=1\n", ], - "expected": '{"raw_message":"id=0\\tblockNo=BAD\\tval1=AM\\tval2=0.5\\tval3=1\\n","error":"Found garbage after field in TSKV format: blockNo: (at row 1)\\n"}', + "expected": { + "raw_message": "id=0\tblockNo=BAD\tval1=AM\tval2=0.5\tval3=1\n", + "error": "Found garbage after field in TSKV format: blockNo: (at row 1)\n", + }, "printable": True, }, "CSV": { @@ -3712,7 +4097,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '0,"BAD","AM",0.5,1\n', ], - "expected": """{"raw_message":"0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n'"}""", + "expected": { + "raw_message": '0,"BAD","AM",0.5,1\n', + "error": "Cannot parse input: expected '\"' before: 'BAD\",\"AM\",0.5,1\\n'", + }, "printable": True, "supports_empty_value": True, }, @@ -3724,7 +4112,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n'"}""", + "expected": { + "raw_message": "0\tBAD\tAM\t0.5\t1\n", + "error": "Cannot parse input: expected '\\t' before: 'BAD\\tAM\\t0.5\\t1\\n'", + }, "supports_empty_value": True, "printable": True, }, @@ -3736,7 +4127,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '"id","blockNo","val1","val2","val3"\n0,"BAD","AM",0.5,1\n', ], - "expected": """{"raw_message":"\\"id\\",\\"blockNo\\",\\"val1\\",\\"val2\\",\\"val3\\"\\n0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n'"}""", + "expected": { + "raw_message": '"id","blockNo","val1","val2","val3"\n0,"BAD","AM",0.5,1\n', + "error": "Cannot parse input: expected '\"' before: 'BAD\",\"AM\",0.5,1\\n'", + }, "printable": True, }, "Values": { @@ -3747,7 +4141,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "(0,'BAD','AM',0.5,1)", ], - "expected": r"""{"raw_message":"(0,'BAD','AM',0.5,1)","error":"Cannot parse string 'BAD' as UInt16: syntax error at begin of string. Note: there are toUInt16OrZero and toUInt16OrNull functions, which returns zero\/NULL instead of throwing exception"}""", + "expected": { + "raw_message": "(0,'BAD','AM',0.5,1)", + "error": "Cannot parse string 'BAD' as UInt16: syntax error at begin of string. Note: there are toUInt16OrZero and toUInt16OrNull functions, which returns zero/NULL instead of throwing exception", + }, "supports_empty_value": True, "printable": True, }, @@ -3759,7 +4156,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "id\tblockNo\tval1\tval2\tval3\n0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n"}""", + "expected": { + "raw_message": "id\tblockNo\tval1\tval2\tval3\n0\tBAD\tAM\t0.5\t1\n", + "error": "Cannot parse input: expected '\\t' before: 'BAD\\tAM\\t0.5\\t1\\n", + }, "supports_empty_value": True, "printable": True, }, @@ -3771,7 +4171,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\nInt64\\tUInt16\\tString\\tFloat32\\tUInt8\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n'"}""", + "expected": { + "raw_message": "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\tBAD\tAM\t0.5\t1\n", + "error": "Cannot parse input: expected '\\t' before: 'BAD\\tAM\\t0.5\\t1\\n'", + }, "printable": True, }, "Native": { @@ -3782,7 +4185,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x53\x74\x72\x69\x6e\x67\x03\x42\x41\x44\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", ], - "expected": """{"raw_message":"050102696405496E743634000000000000000007626C6F636B4E6F06537472696E67034241440476616C3106537472696E6702414D0476616C3207466C6F617433320000003F0476616C330555496E743801","error":"Cannot convert: String to UInt16"}""", + "expected": { + "raw_message": "050102696405496E743634000000000000000007626C6F636B4E6F06537472696E67034241440476616C3106537472696E6702414D0476616C3207466C6F617433320000003F0476616C330555496E743801", + "error": "Cannot convert: String to UInt16", + }, "printable": False, }, "RowBinary": { @@ -3793,7 +4199,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message b"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x42\x41\x44\x02\x41\x4d\x00\x00\x00\x3f\x01", ], - "expected": '{"raw_message":"00000000000000000342414402414D0000003F01","error":"Cannot read all data. Bytes read: 9. Bytes expected: 65.: (at row 1)\\n"}', + "expected": { + "raw_message": "00000000000000000342414402414D0000003F01", + "error": "Cannot read all data. Bytes read: 9. Bytes expected: 65.: (at row 1)\n", + }, "printable": False, }, "RowBinaryWithNamesAndTypes": { @@ -3804,7 +4213,10 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x53\x74\x72\x69\x6e\x67\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x03\x42\x41\x44\x02\x41\x4d\x00\x00\x00\x3f\x01", ], - "expected": '{"raw_message":"0502696407626C6F636B4E6F0476616C310476616C320476616C3305496E74363406537472696E6706537472696E6707466C6F617433320555496E743800000000000000000342414402414D0000003F01","error":"Type of \'blockNo\' must be UInt16, not String"}', + "expected": { + "raw_message": "0502696407626C6F636B4E6F0476616C310476616C320476616C3305496E74363406537472696E6706537472696E6707466C6F617433320555496E743800000000000000000342414402414D0000003F01", + "error": "Type of 'blockNo' must be UInt16, not String", + }, "printable": False, }, "ORC": { @@ -3815,15 +4227,19 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message b"\x4f\x52\x43\x0a\x0b\x0a\x03\x00\x00\x00\x12\x04\x08\x01\x50\x00\x0a\x15\x0a\x05\x00\x00\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x0a\x12\x0a\x06\x00\x00\x00\x00\x00\x00\x12\x08\x08\x01\x42\x02\x08\x06\x50\x00\x0a\x12\x0a\x06\x00\x00\x00\x00\x00\x00\x12\x08\x08\x01\x42\x02\x08\x04\x50\x00\x0a\x29\x0a\x04\x00\x00\x00\x00\x12\x21\x08\x01\x1a\x1b\x09\x00\x00\x00\x00\x00\x00\xe0\x3f\x11\x00\x00\x00\x00\x00\x00\xe0\x3f\x19\x00\x00\x00\x00\x00\x00\xe0\x3f\x50\x00\x0a\x15\x0a\x05\x00\x00\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\xff\x80\xff\x80\xff\x00\xff\x80\xff\x03\x42\x41\x44\xff\x80\xff\x02\x41\x4d\xff\x80\x00\x00\x00\x3f\xff\x80\xff\x01\x0a\x06\x08\x06\x10\x00\x18\x0d\x0a\x06\x08\x06\x10\x01\x18\x17\x0a\x06\x08\x06\x10\x02\x18\x14\x0a\x06\x08\x06\x10\x03\x18\x14\x0a\x06\x08\x06\x10\x04\x18\x2b\x0a\x06\x08\x06\x10\x05\x18\x17\x0a\x06\x08\x00\x10\x00\x18\x02\x0a\x06\x08\x00\x10\x01\x18\x02\x0a\x06\x08\x01\x10\x01\x18\x02\x0a\x06\x08\x00\x10\x02\x18\x02\x0a\x06\x08\x02\x10\x02\x18\x02\x0a\x06\x08\x01\x10\x02\x18\x03\x0a\x06\x08\x00\x10\x03\x18\x02\x0a\x06\x08\x02\x10\x03\x18\x02\x0a\x06\x08\x01\x10\x03\x18\x02\x0a\x06\x08\x00\x10\x04\x18\x02\x0a\x06\x08\x01\x10\x04\x18\x04\x0a\x06\x08\x00\x10\x05\x18\x02\x0a\x06\x08\x01\x10\x05\x18\x02\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x1a\x03\x47\x4d\x54\x0a\x59\x0a\x04\x08\x01\x50\x00\x0a\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x0a\x08\x08\x01\x42\x02\x08\x06\x50\x00\x0a\x08\x08\x01\x42\x02\x08\x04\x50\x00\x0a\x21\x08\x01\x1a\x1b\x09\x00\x00\x00\x00\x00\x00\xe0\x3f\x11\x00\x00\x00\x00\x00\x00\xe0\x3f\x19\x00\x00\x00\x00\x00\x00\xe0\x3f\x50\x00\x0a\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x08\x03\x10\xec\x02\x1a\x0c\x08\x03\x10\x8e\x01\x18\x1d\x20\xc1\x01\x28\x01\x22\x2e\x08\x0c\x12\x05\x01\x02\x03\x04\x05\x1a\x02\x69\x64\x1a\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x1a\x04\x76\x61\x6c\x31\x1a\x04\x76\x61\x6c\x32\x1a\x04\x76\x61\x6c\x33\x20\x00\x28\x00\x30\x00\x22\x08\x08\x04\x20\x00\x28\x00\x30\x00\x22\x08\x08\x08\x20\x00\x28\x00\x30\x00\x22\x08\x08\x08\x20\x00\x28\x00\x30\x00\x22\x08\x08\x05\x20\x00\x28\x00\x30\x00\x22\x08\x08\x01\x20\x00\x28\x00\x30\x00\x30\x01\x3a\x04\x08\x01\x50\x00\x3a\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x3a\x08\x08\x01\x42\x02\x08\x06\x50\x00\x3a\x08\x08\x01\x42\x02\x08\x04\x50\x00\x3a\x21\x08\x01\x1a\x1b\x09\x00\x00\x00\x00\x00\x00\xe0\x3f\x11\x00\x00\x00\x00\x00\x00\xe0\x3f\x19\x00\x00\x00\x00\x00\x00\xe0\x3f\x50\x00\x3a\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x40\x90\x4e\x48\x01\x08\xd5\x01\x10\x00\x18\x80\x80\x04\x22\x02\x00\x0b\x28\x5b\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", ], - "expected": r"""{"raw_message":"4F52430A0B0A030000001204080150000A150A050000000000120C0801120608001000180050000A120A06000000000000120808014202080650000A120A06000000000000120808014202080450000A290A0400000000122108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50000A150A050000000000120C080112060802100218025000FF80FF80FF00FF80FF03424144FF80FF02414DFF800000003FFF80FF010A0608061000180D0A060806100118170A060806100218140A060806100318140A0608061004182B0A060806100518170A060800100018020A060800100118020A060801100118020A060800100218020A060802100218020A060801100218030A060800100318020A060802100318020A060801100318020A060800100418020A060801100418040A060800100518020A060801100518021204080010001204080010001204080010001204080010001204080010001204080010001A03474D540A590A04080150000A0C0801120608001000180050000A0808014202080650000A0808014202080450000A2108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50000A0C080112060802100218025000080310EC021A0C0803108E01181D20C1012801222E080C120501020304051A0269641A07626C6F636B4E6F1A0476616C311A0476616C321A0476616C33200028003000220808042000280030002208080820002800300022080808200028003000220808052000280030002208080120002800300030013A04080150003A0C0801120608001000180050003A0808014202080650003A0808014202080450003A2108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50003A0C08011206080210021802500040904E480108D5011000188080042202000B285B300682F403034F524318","error":"Cannot parse string 'BAD' as UInt16: syntax error at begin of string. Note: there are toUInt16OrZero and toUInt16OrNull functions, which returns zero\/NULL instead of throwing exception."}""", + "expected": { + "raw_message": "4F52430A0B0A030000001204080150000A150A050000000000120C0801120608001000180050000A120A06000000000000120808014202080650000A120A06000000000000120808014202080450000A290A0400000000122108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50000A150A050000000000120C080112060802100218025000FF80FF80FF00FF80FF03424144FF80FF02414DFF800000003FFF80FF010A0608061000180D0A060806100118170A060806100218140A060806100318140A0608061004182B0A060806100518170A060800100018020A060800100118020A060801100118020A060800100218020A060802100218020A060801100218030A060800100318020A060802100318020A060801100318020A060800100418020A060801100418040A060800100518020A060801100518021204080010001204080010001204080010001204080010001204080010001204080010001A03474D540A590A04080150000A0C0801120608001000180050000A0808014202080650000A0808014202080450000A2108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50000A0C080112060802100218025000080310EC021A0C0803108E01181D20C1012801222E080C120501020304051A0269641A07626C6F636B4E6F1A0476616C311A0476616C321A0476616C33200028003000220808042000280030002208080820002800300022080808200028003000220808052000280030002208080120002800300030013A04080150003A0C0801120608001000180050003A0808014202080650003A0808014202080450003A2108011A1B09000000000000E03F11000000000000E03F19000000000000E03F50003A0C08011206080210021802500040904E480108D5011000188080042202000B285B300682F403034F524318", + "error": "Cannot parse string 'BAD' as UInt16: syntax error at begin of string. Note: there are toUInt16OrZero and toUInt16OrNull functions, which returns zero/NULL instead of throwing exception.", + }, "printable": False, }, } topic_name_prefix = "format_tests_4_stream_" + topic_name_postfix = get_topic_postfix(create_query_generator) for format_name, format_opts in list(all_formats.items()): logging.debug(f"Set up {format_name}") - topic_name = f"{topic_name_prefix}{format_name}" + topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" data_sample = format_opts["data_sample"] data_prefix = [] raw_message = "_raw_message" @@ -3833,39 +4249,33 @@ def test_kafka_formats_with_broken_message(kafka_cluster): if format_opts.get("printable", False) == False: raw_message = "hex(_raw_message)" kafka_produce(kafka_cluster, topic_name, data_prefix + data_sample) + create_query = create_query_generator( + f"kafka_{format_name}", + "id Int64, blockNo UInt16, val1 String, val2 Float32, val3 UInt8", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + settings={ + "kafka_handle_error_mode": "stream", + "kafka_flush_interval_ms": 1000, + }, + ) instance.query( - """ + f""" DROP TABLE IF EXISTS test.kafka_{format_name}; - CREATE TABLE test.kafka_{format_name} ( - id Int64, - blockNo UInt16, - val1 String, - val2 Float32, - val3 UInt8 - ) ENGINE = Kafka() - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_group_name = '{topic_name}', - kafka_format = '{format_name}', - kafka_handle_error_mode = 'stream', - kafka_flush_interval_ms = 1000 {extra_settings}; + {create_query}; DROP TABLE IF EXISTS test.kafka_data_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name} WHERE length(_error) = 0; DROP TABLE IF EXISTS test.kafka_errors_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT {raw_message} as raw_message, _error as error, _topic as topic, _partition as partition, _offset as offset FROM test.kafka_{format_name} WHERE length(_error) > 0; - """.format( - topic_name=topic_name, - format_name=format_name, - raw_message=raw_message, - extra_settings=format_opts.get("extra_settings") or "", - ) + """ ) raw_expected = """\ @@ -3900,7 +4310,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): for format_name, format_opts in list(all_formats.items()): logging.debug(f"Checking {format_name}") - topic_name = f"{topic_name_prefix}{format_name}" + topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" # shift offsets by 1 if format supports empty value offsets = ( [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] @@ -3920,229 +4330,222 @@ def test_kafka_formats_with_broken_message(kafka_cluster): assert TSV(result) == TSV(expected), "Proper result for format: {}".format( format_name ) - errors_result = ast.literal_eval( + errors_result = json.loads( instance.query( "SELECT raw_message, error FROM test.kafka_errors_{format_name}_mv format JSONEachRow".format( format_name=format_name ) ) ) - errors_expected = ast.literal_eval(format_opts["expected"]) # print(errors_result.strip()) # print(errors_expected.strip()) assert ( - errors_result["raw_message"] == errors_expected["raw_message"] + errors_result["raw_message"] == format_opts["expected"]["raw_message"] ), "Proper raw_message for format: {}".format(format_name) # Errors text can change, just checking prefixes assert ( - errors_expected["error"] in errors_result["error"] + format_opts["expected"]["error"] in errors_result["error"] ), "Proper error for format: {}".format(format_name) kafka_delete_topic(admin_client, topic_name) -def wait_for_new_data(table_name, prev_count=0, max_retries=120): - retries = 0 - while True: - new_count = int(instance.query("SELECT count() FROM {}".format(table_name))) - print(new_count) - if new_count > prev_count: - return new_count - else: - retries += 1 - time.sleep(0.5) - if retries > max_retries: - raise Exception("No new data :(") +@pytest.mark.parametrize( + "create_query_generator", + [ + generate_old_create_table_query, + generate_new_create_table_query, + ], +) +def test_kafka_consumer_failover(kafka_cluster, create_query_generator): + topic_name = "kafka_consumer_failover" + get_topic_postfix(create_query_generator) + with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=2): + consumer_group = f"{topic_name}_group" + create_queries = [] + for counter in range(3): + create_queries.append( + create_query_generator( + f"kafka{counter+1}", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=consumer_group, + settings={ + "kafka_max_block_size": 1, + "kafka_poll_timeout_ms": 200, + }, + ) + ) -def test_kafka_consumer_failover(kafka_cluster): - # for backporting: - # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + instance.query( + f""" + {create_queries[0]}; + {create_queries[1]}; + {create_queries[2]}; - topic_name = "kafka_consumer_failover" - kafka_create_topic(admin_client, topic_name, num_partitions=2) + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; - instance.query( - """ - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka2; + CREATE MATERIALIZED VIEW test.kafka1_mv TO test.destination AS + SELECT key, value, 'kafka1' as _consumed_by + FROM test.kafka1; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_consumer_failover', - kafka_group_name = 'kafka_consumer_failover_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1, - kafka_poll_timeout_ms = 200; + CREATE MATERIALIZED VIEW test.kafka2_mv TO test.destination AS + SELECT key, value, 'kafka2' as _consumed_by + FROM test.kafka2; - CREATE TABLE test.kafka2 (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_consumer_failover', - kafka_group_name = 'kafka_consumer_failover_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1, - kafka_poll_timeout_ms = 200; - - CREATE TABLE test.kafka3 (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_consumer_failover', - kafka_group_name = 'kafka_consumer_failover_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1, - kafka_poll_timeout_ms = 200; - - CREATE TABLE test.destination ( - key UInt64, - value UInt64, - _consumed_by LowCardinality(String) + CREATE MATERIALIZED VIEW test.kafka3_mv TO test.destination AS + SELECT key, value, 'kafka3' as _consumed_by + FROM test.kafka3; + """ ) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.kafka_mv TO test.destination AS - SELECT key, value, 'kafka' as _consumed_by - FROM test.kafka; + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) - CREATE MATERIALIZED VIEW test.kafka2_mv TO test.destination AS - SELECT key, value, 'kafka2' as _consumed_by - FROM test.kafka2; + ## all 3 attached, 2 working + producer.send( + topic=topic_name, + value=json.dumps({"key": 1, "value": 1}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 1, "value": 1}), + partition=1, + ) + producer.flush() - CREATE MATERIALIZED VIEW test.kafka3_mv TO test.destination AS - SELECT key, value, 'kafka3' as _consumed_by - FROM test.kafka3; - """ - ) + count_query = "SELECT count() FROM test.destination" + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > 0 + ) - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) + ## 2 attached, 2 working + instance.query("DETACH TABLE test.kafka1") + producer.send( + topic=topic_name, + value=json.dumps({"key": 2, "value": 2}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 2, "value": 2}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## all 3 attached, 2 working - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 1, "value": 1}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 1, "value": 1}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination") + ## 1 attached, 1 working + instance.query("DETACH TABLE test.kafka2") + producer.send( + topic=topic_name, + value=json.dumps({"key": 3, "value": 3}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 3, "value": 3}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## 2 attached, 2 working - instance.query("DETACH TABLE test.kafka") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 2, "value": 2}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 2, "value": 2}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) + ## 2 attached, 2 working + instance.query("ATTACH TABLE test.kafka1") + producer.send( + topic=topic_name, + value=json.dumps({"key": 4, "value": 4}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 4, "value": 4}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## 1 attached, 1 working - instance.query("DETACH TABLE test.kafka2") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 3, "value": 3}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 3, "value": 3}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) + ## 1 attached, 1 working + instance.query("DETACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 5, "value": 5}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 5, "value": 5}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## 2 attached, 2 working - instance.query("ATTACH TABLE test.kafka") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 4, "value": 4}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 4, "value": 4}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) + ## 2 attached, 2 working + instance.query("ATTACH TABLE test.kafka2") + producer.send( + topic=topic_name, + value=json.dumps({"key": 6, "value": 6}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 6, "value": 6}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## 1 attached, 1 working - instance.query("DETACH TABLE test.kafka3") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 5, "value": 5}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 5, "value": 5}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) + ## 3 attached, 2 working + instance.query("ATTACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 7, "value": 7}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 7, "value": 7}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) - ## 2 attached, 2 working - instance.query("ATTACH TABLE test.kafka2") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 6, "value": 6}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 6, "value": 6}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) - - ## 3 attached, 2 working - instance.query("ATTACH TABLE test.kafka3") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 7, "value": 7}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 7, "value": 7}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) - - ## 2 attached, same 2 working - instance.query("DETACH TABLE test.kafka3") - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 8, "value": 8}), - partition=0, - ) - producer.send( - topic="kafka_consumer_failover", - value=json.dumps({"key": 8, "value": 8}), - partition=1, - ) - producer.flush() - prev_count = wait_for_new_data("test.destination", prev_count) - kafka_delete_topic(admin_client, topic_name) + ## 2 attached, same 2 working + instance.query("DETACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 8, "value": 8}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 8, "value": 8}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) def test_kafka_predefined_configuration(kafka_cluster): @@ -4172,269 +4575,261 @@ def test_kafka_predefined_configuration(kafka_cluster): # https://github.com/ClickHouse/ClickHouse/issues/26643 -def test_issue26643(kafka_cluster): - # for backporting: - # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_issue26643(kafka_cluster, create_query_generator): producer = KafkaProducer( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, ) + topic_name = "test_issue26643" + get_topic_postfix(create_query_generator) + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - topic_list = [] - topic_list.append( - NewTopic(name="test_issue26643", num_partitions=4, replication_factor=1) - ) - admin_client.create_topics(new_topics=topic_list, validate_only=False) - - msg = message_with_repeated_pb2.Message( - tnow=1629000000, - server="server1", - clien="host1", - sPort=443, - cPort=50000, - r=[ - message_with_repeated_pb2.dd( - name="1", type=444, ttl=123123, data=b"adsfasd" - ), - message_with_repeated_pb2.dd(name="2"), - ], - method="GET", - ) - - data = b"" - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - - msg = message_with_repeated_pb2.Message(tnow=1629000002) - - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - - producer.send(topic="test_issue26643", value=data) - - data = _VarintBytes(len(serialized_msg)) + serialized_msg - producer.send(topic="test_issue26643", value=data) - producer.flush() - - instance.query( - """ - CREATE TABLE IF NOT EXISTS test.test_queue - ( - `tnow` UInt32, - `server` String, - `client` String, - `sPort` UInt16, - `cPort` UInt16, - `r.name` Array(String), - `r.class` Array(UInt16), - `r.type` Array(UInt16), - `r.ttl` Array(UInt32), - `r.data` Array(String), - `method` String + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + msg = message_with_repeated_pb2.Message( + tnow=1629000000, + server="server1", + clien="host1", + sPort=443, + cPort=50000, + r=[ + message_with_repeated_pb2.dd( + name="1", type=444, ttl=123123, data=b"adsfasd" + ), + message_with_repeated_pb2.dd(name="2"), + ], + method="GET", ) - ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'test_issue26643', - kafka_group_name = 'test_issue26643_group', - kafka_format = 'Protobuf', - kafka_schema = 'message_with_repeated.proto:Message', - kafka_num_consumers = 4, - kafka_skip_broken_messages = 10000; - SET allow_suspicious_low_cardinality_types=1; + data = b"" + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - CREATE TABLE test.log - ( - `tnow` DateTime('Asia/Istanbul') CODEC(DoubleDelta, LZ4), - `server` LowCardinality(String), - `client` LowCardinality(String), - `sPort` LowCardinality(UInt16), - `cPort` UInt16 CODEC(T64, LZ4), - `r.name` Array(String), - `r.class` Array(LowCardinality(UInt16)), - `r.type` Array(LowCardinality(UInt16)), - `r.ttl` Array(LowCardinality(UInt32)), - `r.data` Array(String), - `method` LowCardinality(String) + msg = message_with_repeated_pb2.Message(tnow=1629000002) + + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + producer.send(topic_name, value=data) + + data = _VarintBytes(len(serialized_msg)) + serialized_msg + producer.send(topic_name, value=data) + producer.flush() + + create_query = create_query_generator( + "test_queue", + """`tnow` UInt32, + `server` String, + `client` String, + `sPort` UInt16, + `cPort` UInt16, + `r.name` Array(String), + `r.class` Array(UInt16), + `r.type` Array(UInt16), + `r.ttl` Array(UInt32), + `r.data` Array(String), + `method` String""", + topic_list=topic_name, + consumer_group=f"{topic_name}_group", + format="Protobuf", + settings={ + "kafka_schema": "message_with_repeated.proto:Message", + "kafka_skip_broken_messages": 10000, + "kafka_thread_per_consumer": thread_per_consumer, + }, ) - ENGINE = MergeTree - PARTITION BY toYYYYMMDD(tnow) - ORDER BY (tnow, server) - TTL toDate(tnow) + toIntervalMonth(1000) - SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; - CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS - SELECT - toDateTime(a.tnow) AS tnow, - a.server AS server, - a.client AS client, - a.sPort AS sPort, - a.cPort AS cPort, - a.`r.name` AS `r.name`, - a.`r.class` AS `r.class`, - a.`r.type` AS `r.type`, - a.`r.ttl` AS `r.ttl`, - a.`r.data` AS `r.data`, - a.method AS method - FROM test.test_queue AS a; - """ - ) + instance.query( + f""" + {create_query}; - instance.wait_for_log_line("Committed offset") - result = instance.query("SELECT * FROM test.log") + SET allow_suspicious_low_cardinality_types=1; - expected = """\ -2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET -2021-08-15 07:00:02 0 0 [] [] [] [] [] -2021-08-15 07:00:02 0 0 [] [] [] [] [] -""" - assert TSV(result) == TSV(expected) + CREATE TABLE test.log + ( + `tnow` DateTime('Asia/Istanbul') CODEC(DoubleDelta, LZ4), + `server` LowCardinality(String), + `client` LowCardinality(String), + `sPort` LowCardinality(UInt16), + `cPort` UInt16 CODEC(T64, LZ4), + `r.name` Array(String), + `r.class` Array(LowCardinality(UInt16)), + `r.type` Array(LowCardinality(UInt16)), + `r.ttl` Array(LowCardinality(UInt32)), + `r.data` Array(String), + `method` LowCardinality(String) + ) + ENGINE = MergeTree + PARTITION BY toYYYYMMDD(tnow) + ORDER BY (tnow, server) + TTL toDate(tnow) + toIntervalMonth(1000) + SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; - # kafka_cluster.open_bash_shell('instance') + CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS + SELECT + toDateTime(a.tnow) AS tnow, + a.server AS server, + a.client AS client, + a.sPort AS sPort, + a.cPort AS cPort, + a.`r.name` AS `r.name`, + a.`r.class` AS `r.class`, + a.`r.type` AS `r.type`, + a.`r.ttl` AS `r.ttl`, + a.`r.data` AS `r.data`, + a.method AS method + FROM test.test_queue AS a; + """ + ) + + instance.wait_for_log_line("Committed offset") + result = instance.query("SELECT * FROM test.log") + + expected = """\ + 2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET + 2021-08-15 07:00:02 0 0 [] [] [] [] [] + 2021-08-15 07:00:02 0 0 [] [] [] [] [] + """ + assert TSV(result) == TSV(expected) -def test_num_consumers_limit(kafka_cluster): +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_num_consumers_limit(kafka_cluster, create_query_generator): instance.query("DROP TABLE IF EXISTS test.kafka") - error = instance.query_and_get_error( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n', '', 100) - SETTINGS kafka_commit_on_select = 1; - """ + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + settings={ + "kafka_num_consumers": 100, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + error = instance.query_and_get_error(create_query) + + assert ( + "BAD_ARGUMENTS" in error + and "The number of consumers can not be bigger than" in error ) - assert "BAD_ARGUMENTS" in error - instance.query( - """ + f""" SET kafka_disable_num_consumers_limit = 1; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n', '', 100) - SETTINGS kafka_commit_on_select = 1; + {create_query}; """ ) instance.query("DROP TABLE test.kafka") -def test_format_with_prefix_and_suffix(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_format_with_prefix_and_suffix(kafka_cluster, create_query_generator): + topic_name = "custom" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="CustomSeparated", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + {create_query}; + """ + ) + + instance.query( + "INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + message_count = 2 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + + assert len(messages) == 2 + + assert ( + "".join(messages) + == "\n0\t0\n\n\n10\t100\n\n" + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_max_rows_per_message(kafka_cluster, create_query_generator): + topic_name = "custom_max_rows_per_message" + get_topic_postfix( + create_query_generator ) - kafka_create_topic(admin_client, "custom") + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + num_rows = 5 - instance.query( + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="CustomSeparated", + settings={ + "format_custom_result_before_delimiter": "\n", + "format_custom_result_after_delimiter": "\n", + "kafka_max_rows_per_message": 3, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.kafka; """ - DROP TABLE IF EXISTS test.kafka; + ) - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'custom', - kafka_group_name = 'custom', - kafka_format = 'CustomSeparated'; - """ - ) + instance.query( + f"INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) - instance.query( - "INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) + message_count = 2 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) - messages = [] + assert len(messages) == message_count - attempt = 0 - while attempt < 100: - messages.extend(kafka_consume(kafka_cluster, "custom")) - if len(messages) == 2: - break - attempt += 1 + assert ( + "".join(messages) + == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" + ) - assert len(messages) == 2 + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) - assert ( - "".join(messages) == "\n0\t0\n\n\n10\t100\n\n" - ) - - kafka_delete_topic(admin_client, "custom") + result = instance.query("SELECT * FROM test.view") + assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" -def test_max_rows_per_message(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic = "custom_max_rows_per_message" - - kafka_create_topic(admin_client, topic) - - num_rows = 5 - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.kafka; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_format = 'CustomSeparated', - format_custom_result_before_delimiter = '\n', - format_custom_result_after_delimiter = '\n', - kafka_max_rows_per_message = 3; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.kafka; - """ - ) - - instance.query( - f"INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - messages = [] - - attempt = 0 - while attempt < 500: - messages.extend(kafka_consume(kafka_cluster, topic)) - if len(messages) == 2: - break - attempt += 1 - - assert len(messages) == 2 - - assert ( - "".join(messages) - == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" - ) - - attempt = 0 - rows = 0 - while attempt < 500: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view") - assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" - - kafka_delete_topic(admin_client, topic) - - -def test_row_based_formats(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_row_based_formats(kafka_cluster, create_query_generator): + admin_client = get_admin_client(kafka_cluster) for format_name in [ "TSV", @@ -4454,121 +4849,111 @@ def test_row_based_formats(kafka_cluster): "RowBinaryWithNamesAndTypes", "MsgPack", ]: - print(format_name) + logging.debug("Checking {format_name}") - kafka_create_topic(admin_client, format_name) + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" - num_rows = 10 + with kafka_topic(admin_client, topic_name): + num_rows = 10 + max_rows_per_message = 5 + message_count = num_rows / max_rows_per_message + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + settings={"kafka_max_rows_per_message": max_rows_per_message}, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + + INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); + """ + ) + + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) + + assert len(messages) == message_count + + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) + + result = instance.query("SELECT * FROM test.view") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_block_based_formats_1(kafka_cluster, create_query_generator): + topic_name = "pretty_space" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="PrettySpace", + ) instance.query( f""" - DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5; + {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.kafka; - - INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1, output_format_pretty_row_numbers=0; """ ) - messages = [] + message_count = 3 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + assert len(messages) == 3 - attempt = 0 - while attempt < 500: - messages.extend(kafka_consume(kafka_cluster, format_name, needDecode=False)) - if len(messages) == 2: - break - attempt += 1 + data = [] + for message in messages: + splitted = message.split("\n") + assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" + assert splitted[1] == "" + assert splitted[-1] == "" + data += [line.split() for line in splitted[2:-1]] - assert len(messages) == 2 - - attempt = 0 - rows = 0 - while attempt < 500: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - kafka_delete_topic(admin_client, format_name) + assert data == [ + ["0", "0"], + ["10", "100"], + ["20", "200"], + ["30", "300"], + ["40", "400"], + ] -def test_block_based_formats_1(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic = "pretty_space" - kafka_create_topic(admin_client, topic) - - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_format = 'PrettySpace'; - - INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1, output_format_pretty_row_numbers=0; - """ - ) - - messages = [] - - attempt = 0 - while attempt < 500: - messages.extend(kafka_consume(kafka_cluster, topic)) - if len(messages) == 3: - break - attempt += 1 - - assert len(messages) == 3 - - data = [] - for message in messages: - splitted = message.split("\n") - assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" - assert splitted[1] == "" - assert splitted[-1] == "" - data += [line.split() for line in splitted[2:-1]] - - assert data == [ - ["0", "0"], - ["10", "100"], - ["20", "200"], - ["30", "300"], - ["40", "400"], - ] - - kafka_delete_topic(admin_client, topic) - - -def test_block_based_formats_2(kafka_cluster): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_block_based_formats_2(kafka_cluster, create_query_generator): + admin_client = get_admin_client(kafka_cluster) num_rows = 100 + message_count = 9 for format_name in [ "JSONColumns", @@ -4578,55 +4963,50 @@ def test_block_based_formats_2(kafka_cluster): "ORC", "JSONCompactColumns", ]: - kafka_create_topic(admin_client, format_name) + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + logging.debug(f"Checking format {format_name}") + with kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.kafka; + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}'; + {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.kafka; + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; - INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; - """ - ) + INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; + """ + ) + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) + assert len(messages) == message_count - messages = [] + rows = int( + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) + ) - attempt = 0 - while attempt < 500: - messages.extend(kafka_consume(kafka_cluster, format_name, needDecode=False)) - if len(messages) == 9: - break - attempt += 1 + assert rows == num_rows - assert len(messages) == 9 - - attempt = 0 - rows = 0 - while attempt < 500: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view ORDER by key") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - kafka_delete_topic(admin_client, format_name) + result = instance.query("SELECT * FROM test.view ORDER by key") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected def test_system_kafka_consumers(kafka_cluster): @@ -4962,137 +5342,186 @@ def test_formats_errors(kafka_cluster): "HiveText", "MySQLDump", ]: - kafka_create_topic(admin_client, format_name) - table_name = f"kafka_{format_name}" + with kafka_topic(admin_client, format_name): + table_name = f"kafka_{format_name}" + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5, + format_template_row='template_row.format', + format_regexp='id: (.+?)', + input_format_with_names_use_header=0, + format_schema='key_value_message:Message'; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + kafka_produce( + kafka_cluster, + format_name, + ["Broken message\nBroken message\nBroken message\n"], + ) + + num_errors = int( + instance.query_with_retry( + f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", + check_callback=lambda res: int(res) > 0, + ) + ) + + assert num_errors > 0 + + instance.query(f"DROP TABLE test.{table_name}") + instance.query("DROP TABLE test.view") + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_multiple_read_in_materialized_views(kafka_cluster, create_query_generator): + topic_name = "multiple_read_from_mv" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka_multiple_read_input", + "id Int64", + topic_list=topic_name, + consumer_group=topic_name, + ) instance.query( f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; + DROP TABLE IF EXISTS test.kafka_multiple_read_input SYNC; + DROP TABLE IF EXISTS test.kafka_multiple_read_table; + DROP TABLE IF EXISTS test.kafka_multiple_read_mv; - CREATE TABLE test.{table_name} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5, - format_template_row='template_row.format', - format_regexp='id: (.+?)', - input_format_with_names_use_header=0, - format_schema='key_value_message:Message'; + {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.{table_name}; - """ + CREATE TABLE test.kafka_multiple_read_table (id Int64) + ENGINE = MergeTree + ORDER BY id; + + + CREATE MATERIALIZED VIEW test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS + SELECT id + FROM test.kafka_multiple_read_input + WHERE id NOT IN ( + SELECT id + FROM test.kafka_multiple_read_table + WHERE id IN ( + SELECT id + FROM test.kafka_multiple_read_input + ) + ); + """ ) kafka_produce( - kafka_cluster, - format_name, - ["Broken message\nBroken message\nBroken message\n"], + kafka_cluster, topic_name, [json.dumps({"id": 42}), json.dumps({"id": 43})] ) - attempt = 0 - num_errors = 0 - while attempt < 200: - num_errors = int( - instance.query( - f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'" - ) - ) - if num_errors > 0: - break - attempt += 1 + expected_result = "42\n43\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result - assert num_errors > 0 + # Verify that the query deduplicates the records as it meant to be + messages = [] + for _ in range(0, 10): + messages.append(json.dumps({"id": 42})) + messages.append(json.dumps({"id": 43})) - kafka_delete_topic(admin_client, format_name) - instance.query(f"DROP TABLE test.{table_name}") - instance.query("DROP TABLE test.view") + messages.append(json.dumps({"id": 44})) + + kafka_produce(kafka_cluster, topic_name, messages) + + expected_result = "42\n43\n44\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + instance.query( + f""" + DROP TABLE test.kafka_multiple_read_input; + DROP TABLE test.kafka_multiple_read_table; + DROP TABLE test.kafka_multiple_read_mv; + """ + ) -def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic = "multiple_read_from_mv" - kafka_create_topic(admin_client, topic) +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_null_message(kafka_cluster, create_query_generator): + topic_name = "null_message" instance.query( f""" - DROP TABLE IF EXISTS test.kafka_multiple_read_input; - DROP TABLE IF EXISTS test.kafka_multiple_read_table; - DROP TABLE IF EXISTS test.kafka_multiple_read_mv; + DROP TABLE IF EXISTS test.null_message_view; + DROP TABLE IF EXISTS test.null_message_consumer; + DROP TABLE IF EXISTS test.null_message_kafka; - CREATE TABLE test.kafka_multiple_read_input (id Int64) - ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_format = 'JSONEachRow'; + {create_query_generator("null_message_kafka", "value UInt64", topic_list=topic_name, consumer_group="mv")}; + CREATE TABLE test.null_message_view (value UInt64) + ENGINE = MergeTree() + ORDER BY value; + CREATE MATERIALIZED VIEW test.null_message_consumer TO test.null_message_view AS + SELECT * FROM test.null_message_kafka; + """ + ) - CREATE TABLE test.kafka_multiple_read_table (id Int64) - ENGINE = MergeTree - ORDER BY id; + message_key_values = [] + for i in range(5): + # Here the key is key for Kafka message + message = json.dumps({"value": i}) if i != 3 else None + message_key_values.append({"key": f"{i}".encode(), "message": message}) + producer = get_kafka_producer(kafka_cluster.kafka_port, producer_serializer, 15) + for message_kv in message_key_values: + producer.send( + topic=topic_name, key=message_kv["key"], value=message_kv["message"] + ) + producer.flush() - CREATE MATERIALIZED VIEW IF NOT EXISTS test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS - SELECT id - FROM test.kafka_multiple_read_input - WHERE id NOT IN ( - SELECT id - FROM test.kafka_multiple_read_table - WHERE id IN ( - SELECT id - FROM test.kafka_multiple_read_input - ) - ); + expected = TSV( """ +0 +1 +2 +4 +""" ) + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result = instance.query_with_retry( + "SELECT * FROM test.null_message_view", + check_callback=lambda res: TSV(res) == expected, + ) - kafka_produce( - kafka_cluster, topic, [json.dumps({"id": 42}), json.dumps({"id": 43})] - ) + assert expected == TSV(result) - expected_result = "42\n43\n" - res = instance.query_with_retry( - f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", - retry_count=30, - sleep_time=0.5, - check_callback=lambda res: res == expected_result, - ) - assert res == expected_result - - # Verify that the query deduplicates the records as it meant to be - messages = [] - for i in range(0, 10): - messages.append(json.dumps({"id": 42})) - messages.append(json.dumps({"id": 43})) - - messages.append(json.dumps({"id": 44})) - - kafka_produce(kafka_cluster, topic, messages) - - expected_result = "42\n43\n44\n" - res = instance.query_with_retry( - f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", - retry_count=30, - sleep_time=0.5, - check_callback=lambda res: res == expected_result, - ) - assert res == expected_result - - kafka_delete_topic(admin_client, topic) - instance.query( - f""" - DROP TABLE test.kafka_multiple_read_input; - DROP TABLE test.kafka_multiple_read_table; - DROP TABLE test.kafka_multiple_read_mv; + instance.query( + """ + DROP TABLE test.null_message_consumer SYNC; + DROP TABLE test.null_message_view; + DROP TABLE test.null_message_kafka SYNC; """ - ) + ) if __name__ == "__main__": diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py new file mode 100644 index 00000000000..fc10a07f239 --- /dev/null +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -0,0 +1,243 @@ +import time +import logging + +import pytest +from helpers.cluster import ClickHouseCluster, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from kafka.admin import NewTopic + +if is_arm(): + pytestmark = pytest.mark.skip + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml", "configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_kafka=True, + with_zookeeper=True, # For Replicated Table + macros={ + "kafka_broker": "kafka1", + "kafka_topic_old": "old", + "kafka_group_name_old": "old", + "kafka_topic_new": "new", + "kafka_group_name_new": "new", + "kafka_client_id": "instance", + "kafka_format_json_each_row": "JSONEachRow", + }, + clickhouse_path_dir="clickhouse_path", +) + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + cluster.start() + kafka_id = instance.cluster.kafka_docker_id + print(("kafka_id is {}".format(kafka_id))) + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(): + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + # logging.debug("kafka is available - running test") + yield # run test + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def test_kafka_produce_http_interface_row_based_format(kafka_cluster): + # reproduction of #61060 with validating the written messages + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic_prefix = "http_row_" + + # It is important to have: + # - long enough messages + # - enough messages + # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages + # For the number of messages it seems like at least 3 messages is necessary + expected_key = "01234567890123456789" + expected_value = "aaaaabbbbbccccc" + + insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" + insert_query_template = "INSERT INTO {table_name} " + insert_query_end + + extra_settings = { + "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", + "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", + "Template": ", format_template_row='string_key_value.format'", + } + + # Only the formats that can be used both and input and output format are tested + # Reasons to exclude following formats: + # - JSONStrings: not actually an input format + # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, + # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. + # - ProtobufList: I didn't want to deal with the envelope and stuff + # - Npy: supports only single column + # - LineAsString: supports only single column + # - RawBLOB: supports only single column + formats_to_test = [ + "TabSeparated", + "TabSeparatedRaw", + "TabSeparatedWithNames", + "TabSeparatedWithNamesAndTypes", + "TabSeparatedRawWithNames", + "TabSeparatedRawWithNamesAndTypes", + "Template", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONColumns", + "JSONColumnsWithMetadata", + "JSONCompact", + "JSONCompactColumns", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNames", + "JSONCompactEachRowWithNamesAndTypes", + "JSONCompactStringsEachRow", + "JSONCompactStringsEachRowWithNames", + "JSONCompactStringsEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "BSONEachRow", + "TSKV", + "Protobuf", + "Avro", + "Parquet", + "Arrow", + "ArrowStream", + "ORC", + "RowBinary", + "RowBinaryWithNames", + "RowBinaryWithNamesAndTypes", + "Native", + "CapnProto", + "MsgPack", + ] + for format in formats_to_test: + logging.debug(f"Creating tables and writing messages to {format}") + topic = topic_prefix + format + kafka_create_topic(admin_client, topic) + + extra_setting = extra_settings.get(format, "") + + # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug + instance.query( + f""" + DROP TABLE IF EXISTS test.view_{topic}; + DROP TABLE IF EXISTS test.consumer_{topic}; + CREATE TABLE test.kafka_writer_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}', + kafka_max_rows_per_message = 2 {extra_setting}; + + CREATE TABLE test.kafka_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}' {extra_setting}; + + CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS + SELECT key, value FROM test.kafka_{topic}; + """ + ) + instance.http_query( + insert_query_template.format(table_name="test.kafka_writer_" + topic), + method="POST", + ) + + expected = f"""\ +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +""" + # give some times for the readers to read the messages + for format in formats_to_test: + logging.debug(f"Checking result for {format}") + topic = topic_prefix + format + + result = instance.query_with_retry( + f"SELECT * FROM test.view_{topic}", + check_callback=lambda res: res.count("\n") == 3, + ) + + assert TSV(result) == TSV(expected) + + kafka_delete_topic(admin_client, topic) + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 5948954ff5f..c724c5bb498 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -445,7 +445,7 @@ def test_mysql_distributed(started_cluster): query = "SELECT * FROM (" for i in range(3): query += "SELECT name FROM test_replicas UNION DISTINCT " - query += "SELECT name FROM test_replicas)" + query += "SELECT name FROM test_replicas) ORDER BY name" result = node2.query(query) assert result == "host2\nhost3\nhost4\n" @@ -827,6 +827,9 @@ def test_settings(started_cluster): f"with settings: connect_timeout={connect_timeout}, read_write_timeout={rw_timeout}" ) + node1.query("DROP DATABASE IF EXISTS m") + node1.query("DROP DATABASE IF EXISTS mm") + rw_timeout = 40123001 connect_timeout = 40123002 node1.query( @@ -855,6 +858,9 @@ def test_settings(started_cluster): f"with settings: connect_timeout={connect_timeout}, read_write_timeout={rw_timeout}" ) + node1.query("DROP DATABASE m") + node1.query("DROP DATABASE mm") + drop_mysql_table(conn, table_name) conn.close() @@ -930,6 +936,9 @@ def test_joins(started_cluster): conn.commit() + node1.query("DROP TABLE IF EXISTS test_joins_table_users") + node1.query("DROP TABLE IF EXISTS test_joins_table_tickets") + node1.query( """ CREATE TABLE test_joins_table_users @@ -964,6 +973,9 @@ def test_joins(started_cluster): """ ) == "281607\tFeedback\t2024-06-25 12:09:41\tuser@example.com\n" + node1.query("DROP TABLE test_joins_table_users") + node1.query("DROP TABLE test_joins_table_tickets") + if __name__ == "__main__": with contextmanager(started_cluster)() as cluster: diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 92d6f181464..9e3ee19179a 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1,6 +1,7 @@ import io import logging import random +import string import time import pytest @@ -13,7 +14,6 @@ from uuid import uuid4 AVAILABLE_MODES = ["unordered", "ordered"] DEFAULT_AUTH = ["'minio'", "'minio123'"] NO_AUTH = ["NOSIGN"] -AZURE_CONTAINER_NAME = "cont" def prepare_public_s3_bucket(started_cluster): @@ -68,13 +68,24 @@ def s3_queue_setup_teardown(started_cluster): instance = started_cluster.instances["instance"] instance_2 = started_cluster.instances["instance2"] - instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") - instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + instance.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;") + instance_2.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;") minio = started_cluster.minio_client objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) for obj in objects: minio.remove_object(started_cluster.minio_bucket, obj.object_name) + + container_client = started_cluster.blob_service_client.get_container_client( + started_cluster.azurite_container + ) + + if container_client.exists(): + blob_names = [b.name for b in container_client.list_blobs()] + logging.debug(f"Deleting blobs: {blob_names}") + for b in blob_names: + container_client.delete_blob(b) + yield # run test @@ -129,11 +140,6 @@ def started_cluster(): cluster.start() logging.info("Cluster started") - container_client = cluster.blob_service_client.get_container_client( - AZURE_CONTAINER_NAME - ) - container_client.create_container() - yield cluster finally: cluster.shutdown() @@ -190,7 +196,7 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): def put_azure_file_content(started_cluster, filename, data, bucket=None): client = started_cluster.blob_service_client.get_blob_client( - AZURE_CONTAINER_NAME, filename + started_cluster.azurite_container, filename ) buf = io.BytesIO(data) client.upload_blob(buf, "BlockBlob", len(data)) @@ -226,7 +232,7 @@ def create_table( url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" else: - engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" + engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', '{started_cluster.azurite_container}', '{files_path}/', 'CSV')" node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" @@ -262,15 +268,21 @@ def create_mv( ) +def generate_random_string(length=6): + return "".join(random.choice(string.ascii_lowercase) for i in range(length)) + + @pytest.mark.parametrize("mode", ["unordered", "ordered"]) @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) def test_delete_after_processing(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.delete_after_processing_{mode}_{engine_name}" + table_name = f"delete_after_processing_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" files_num = 5 row_num = 10 + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" if engine_name == "S3Queue": storage = "s3" else: @@ -285,7 +297,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): table_name, mode, files_path, - additional_settings={"after_processing": "delete"}, + additional_settings={"after_processing": "delete", "keeper_path": keeper_path}, engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -313,7 +325,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): assert len(objects) == 0 else: client = started_cluster.blob_service_client.get_container_client( - AZURE_CONTAINER_NAME + started_cluster.azurite_container ) objects_iterator = client.list_blobs(files_path) for objects in objects_iterator: @@ -324,11 +336,12 @@ def test_delete_after_processing(started_cluster, mode, engine_name): @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) def test_failed_retry(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.failed_retry_{mode}_{engine_name}" + table_name = f"failed_retry_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" file_path = f"{files_path}/trash_test.csv" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" retries_num = 3 values = [ @@ -385,8 +398,9 @@ def test_failed_retry(started_cluster, mode, engine_name): @pytest.mark.parametrize("mode", AVAILABLE_MODES) def test_direct_select_file(started_cluster, mode): node = started_cluster.instances["instance"] - table_name = f"test.direct_select_file_{mode}" - keeper_path = f"/clickhouse/test_{table_name}" + table_name = f"direct_select_file_{mode}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{mode}_{generate_random_string()}" files_path = f"{table_name}_data" file_path = f"{files_path}/test.csv" @@ -447,7 +461,7 @@ def test_direct_select_file(started_cluster, mode): ] == [] # New table with different zookeeper path - keeper_path = f"/clickhouse/test_{table_name}_{mode}_2" + keeper_path = f"{keeper_path}_2" create_table( started_cluster, node, @@ -491,8 +505,17 @@ def test_direct_select_multiple_files(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"direct_select_multiple_files_{mode}" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" - create_table(started_cluster, node, table_name, mode, files_path) + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={"keeper_path": keeper_path}, + ) for i in range(5): rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] values_csv = ( @@ -515,14 +538,23 @@ def test_direct_select_multiple_files(started_cluster, mode): @pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_streaming_to_view_(started_cluster, mode): +def test_streaming_to_view(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"streaming_to_view_{mode}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" total_values = generate_random_files(started_cluster, files_path, 10) - create_table(started_cluster, node, table_name, mode, files_path) + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={"keeper_path": keeper_path}, + ) create_mv(node, table_name, dst_table_name) expected_values = set([tuple(i) for i in total_values]) @@ -544,7 +576,8 @@ def test_streaming_to_many_views(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"streaming_to_many_views_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" for i in range(3): @@ -582,7 +615,8 @@ def test_streaming_to_many_views(started_cluster, mode): def test_multiple_tables_meta_mismatch(started_cluster): node = started_cluster.instances["instance"] table_name = f"multiple_tables_meta_mismatch" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" create_table( @@ -675,7 +709,8 @@ def test_multiple_tables_streaming_sync(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"multiple_tables_streaming_sync_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 @@ -756,7 +791,10 @@ def test_multiple_tables_streaming_sync(started_cluster, mode): def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): node = started_cluster.instances["instance"] node_2 = started_cluster.instances["instance2"] - table_name = f"multiple_tables_streaming_sync_distributed_{mode}" + # A unique table name is necessary for repeatable tests + table_name = ( + f"multiple_tables_streaming_sync_distributed_{mode}_{generate_random_string()}" + ) dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -771,7 +809,11 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): table_name, mode, files_path, - additional_settings={"keeper_path": keeper_path, "s3queue_buckets": 2}, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_buckets": 2, + **({"s3queue_processing_threads_num": 1} if mode == "ordered" else {}), + }, ) for instance in [node, node_2]: @@ -806,6 +848,10 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines() ] + logging.debug( + f"res1 size: {len(res1)}, res2 size: {len(res2)}, total_rows: {total_rows}" + ) + assert len(res1) + len(res2) == total_rows # Checking that all engines have made progress @@ -825,7 +871,8 @@ def test_max_set_age(started_cluster): node = started_cluster.instances["instance"] table_name = "max_set_age" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" max_age = 20 files_to_generate = 10 @@ -936,10 +983,9 @@ def test_max_set_age(started_cluster): def test_max_set_size(started_cluster): node = started_cluster.instances["instance"] table_name = f"max_set_size" - dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" - max_age = 10 files_to_generate = 10 create_table( @@ -983,7 +1029,8 @@ def test_drop_table(started_cluster): node = started_cluster.instances["instance"] table_name = f"test_drop" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 @@ -1013,9 +1060,11 @@ def test_drop_table(started_cluster): def test_s3_client_reused(started_cluster): node = started_cluster.instances["instance"] - table_name = f"test.test_s3_client_reused" + table_name = f"test_s3_client_reused" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" row_num = 10 def get_created_s3_clients_count(): @@ -1049,6 +1098,7 @@ def test_s3_client_reused(started_cluster): additional_settings={ "after_processing": "delete", "s3queue_processing_threads_num": 1, + "keeper_path": keeper_path, }, auth=NO_AUTH, bucket=started_cluster.minio_public_bucket, @@ -1106,7 +1156,8 @@ def test_processing_threads(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"processing_threads_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 processing_threads = 32 @@ -1173,7 +1224,8 @@ def test_shards(started_cluster, mode, processing_threads): node = started_cluster.instances["instance"] table_name = f"test_shards_{mode}_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 shards_num = 3 @@ -1292,7 +1344,7 @@ where zookeeper_path ilike '%{table_name}%' and status = 'Processed' and rows_pr pytest.param("unordered", 1), pytest.param("unordered", 8), pytest.param("ordered", 1), - pytest.param("ordered", 8), + pytest.param("ordered", 2), ], ) def test_shards_distributed(started_cluster, mode, processing_threads): @@ -1300,10 +1352,11 @@ def test_shards_distributed(started_cluster, mode, processing_threads): node_2 = started_cluster.instances["instance2"] table_name = f"test_shards_distributed_{mode}_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 - row_num = 50 + row_num = 300 total_rows = row_num * files_to_generate shards_num = 2 @@ -1453,8 +1506,8 @@ def test_settings_check(started_cluster): node = started_cluster.instances["instance"] node_2 = started_cluster.instances["instance2"] table_name = f"test_settings_check" - dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" mode = "ordered" @@ -1496,7 +1549,10 @@ def test_processed_file_setting(started_cluster, processing_threads): node = started_cluster.instances["instance"] table_name = f"test_processed_file_setting_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}_{processing_threads}" + # A unique path is necessary for repeatable tests + keeper_path = ( + f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}" + ) files_path = f"{table_name}_data" files_to_generate = 10 @@ -1547,7 +1603,10 @@ def test_processed_file_setting_distributed(started_cluster, processing_threads) node_2 = started_cluster.instances["instance2"] table_name = f"test_processed_file_setting_distributed_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = ( + f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}" + ) files_path = f"{table_name}_data" files_to_generate = 10 @@ -1601,7 +1660,8 @@ def test_upgrade(started_cluster): table_name = f"test_upgrade" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 10 @@ -1640,7 +1700,8 @@ def test_upgrade(started_cluster): def test_exception_during_insert(started_cluster): node = started_cluster.instances["instance_too_many_parts"] - table_name = f"test_exception_during_insert" + # A unique table name is necessary for repeatable tests + table_name = f"test_exception_during_insert_{generate_random_string()}" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -1656,6 +1717,7 @@ def test_exception_during_insert(started_cluster): "keeper_path": keeper_path, }, ) + node.rotate_logs() total_values = generate_random_files( started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 ) @@ -1672,33 +1734,49 @@ def test_exception_during_insert(started_cluster): ) assert "Too many parts" in exception + original_parts_to_throw_insert = 0 + modified_parts_to_throw_insert = 10 node.replace_in_config( "/etc/clickhouse-server/config.d/merge_tree.xml", - "parts_to_throw_insert>0", - "parts_to_throw_insert>10", + f"parts_to_throw_insert>{original_parts_to_throw_insert}", + f"parts_to_throw_insert>{modified_parts_to_throw_insert}", ) - node.restart_clickhouse() + try: + node.restart_clickhouse() - def get_count(): - return int(node.query(f"SELECT count() FROM {dst_table_name}")) + def get_count(): + return int(node.query(f"SELECT count() FROM {dst_table_name}")) - expected_rows = 10 - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) - assert expected_rows == get_count() + expected_rows = 10 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() + finally: + node.replace_in_config( + "/etc/clickhouse-server/config.d/merge_tree.xml", + f"parts_to_throw_insert>{modified_parts_to_throw_insert}", + f"parts_to_throw_insert>{original_parts_to_throw_insert}", + ) + node.restart_clickhouse() def test_commit_on_limit(started_cluster): node = started_cluster.instances["instance"] - table_name = f"test_commit_on_limit" + # A unique table name is necessary for repeatable tests + table_name = f"test_commit_on_limit_{generate_random_string()}" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" files_to_generate = 10 + failed_files_event_before = int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) + ) create_table( started_cluster, node, @@ -1774,6 +1852,9 @@ def test_commit_on_limit(started_cluster): assert "test_999999.csv" in get_processed_files() assert 1 == int( + node.count_in_log(f"Setting file {files_path}/test_9999.csv as failed") + ) + assert failed_files_event_before + 1 == int( node.query( "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 713b327eb76..cfecea5b3d6 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -4,7 +4,7 @@ import pytest from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry +from helpers.test_tools import assert_eq_with_retry, assert_logs_contain_with_retry, TSV cluster = ClickHouseCluster(__file__) node = cluster.add_instance( @@ -12,17 +12,6 @@ node = cluster.add_instance( stay_alive=True, ) -system_logs = [ - # enabled by default - ("system.text_log", 1), - ("system.query_log", 1), - ("system.query_thread_log", 1), - ("system.part_log", 1), - ("system.trace_log", 1), - ("system.metric_log", 1), - ("system.error_log", 1), -] - @pytest.fixture(scope="module", autouse=True) def start_cluster(): @@ -33,22 +22,28 @@ def start_cluster(): cluster.shutdown() -@pytest.fixture(scope="function") -def flush_logs(): +def test_system_logs_exists(): + system_logs = [ + ("system.text_log", 1), + ("system.query_log", 1), + ("system.query_thread_log", 1), + ("system.part_log", 1), + ("system.trace_log", 1), + ("system.metric_log", 1), + ("system.error_log", 1), + ] + node.query("SYSTEM FLUSH LOGS") - - -@pytest.mark.parametrize("table,exists", system_logs) -def test_system_logs(flush_logs, table, exists): - q = "SELECT * FROM {}".format(table) - if exists: - node.query(q) - else: - response = node.query_and_get_error(q) - assert ( - "Table {} does not exist".format(table) in response - or "Unknown table expression identifier '{}'".format(table) in response - ) + for table, exists in system_logs: + q = "SELECT * FROM {}".format(table) + if exists: + node.query(q) + else: + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue @@ -67,14 +62,19 @@ def test_system_logs_non_empty_queue(): def test_system_suspend(): - node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") - node.query("INSERT INTO t VALUES (now());") - node.query("SYSTEM SUSPEND FOR 1 SECOND;") - node.query("INSERT INTO t VALUES (now());") - assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + try: + node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") + node.query("INSERT INTO t VALUES (now());") + node.query("SYSTEM SUSPEND FOR 1 SECOND;") + node.query("INSERT INTO t VALUES (now());") + assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + finally: + node.query("DROP TABLE IF EXISTS t;") def test_log_max_size(start_cluster): + # we do misconfiguration here: buffer_size_rows_flush_threshold > max_size_rows, flush_interval_milliseconds is huge + # no auto flush by size not by time has a chance node.exec_in_container( [ "bash", @@ -83,6 +83,7 @@ def test_log_max_size(start_cluster): 1000000 + 1000000 10 10 @@ -91,11 +92,24 @@ def test_log_max_size(start_cluster): """, ] ) - node.restart_clickhouse() - for i in range(10): - node.query(f"select {i}") - assert node.query("select count() >= 10 from system.query_log") == "1\n" + node.query("SYSTEM FLUSH LOGS") + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + node.restart_clickhouse() + + # all logs records above max_size_rows are lost + # The accepted logs records are never flushed until system flush logs is called by us + for i in range(21): + node.query(f"select {i}") + node.query("system flush logs") + + assert_logs_contain_with_retry( + node, "Queue had been full at 0, accepted 10 logs, ignored 34 logs." + ) + assert node.query( + "select count() >= 10, count() < 20 from system.query_log" + ) == TSV([[1, 1]]) + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 1bdb1fe3261..8b84734ed02 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -33,124 +33,139 @@ def test_system_logs_recreate(): "error_log", ] - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + try: + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 1 - ) - - # NOTE: we use zzz- prefix to make it the last file, - # so that it will be applied last. - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - ENGINE = Null - - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" not in node.query( - f"SHOW CREATE TABLE system.{table}" - ) - assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 2 - ) - - # apply only storage_policy for all system tables - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - system_tables - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - import logging - - for table in system_logs: - create_table_sql = node.query(f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw") - logging.debug( - "With storage policy, SHOW CREATE TABLE system.%s is: %s", - table, - create_table_sql, - ) - assert "ENGINE = MergeTree" in create_table_sql - assert "ENGINE = Null" not in create_table_sql - assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 1 ) - == 3 - ) - for table in system_logs: - node.exec_in_container( - ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + # NOTE: we use zzz- prefix to make it the last file, + # so that it will be applied last. + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + ENGINE = Null + + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] ) - == 4 - ) - node.query("SYSTEM FLUSH LOGS") - # Ensure that there was no superfluous RENAME's - # IOW that the table created only when the structure is indeed different. - for table in system_logs: - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" not in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 4 - ) + assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 2 + ) + + # apply only storage_policy for all system tables + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + system_tables + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] + ) + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + import logging + + for table in system_logs: + create_table_sql = node.query( + f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw" + ) + logging.debug( + "With storage policy, SHOW CREATE TABLE system.%s is: %s", + table, + create_table_sql, + ) + assert "ENGINE = MergeTree" in create_table_sql + assert "ENGINE = Null" not in create_table_sql + assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 3 + ) + + for table in system_logs: + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] + ) + + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + + node.query("SYSTEM FLUSH LOGS") + # Ensure that there was no superfluous RENAME's + # IOW that the table created only when the structure is indeed different. + for table in system_logs: + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + finally: + for table in system_logs: + for syffix in range(3): + node.query(f"DROP TABLE IF EXISTS system.{table}_{syffix} sync") def test_drop_system_log(): @@ -173,11 +188,20 @@ def test_drop_system_log(): node.query("system flush logs") node.query("select 2") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 2 from system.query_log") == "1\n" + node.query("drop table system.query_log sync") node.query("select 3") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 1 from system.query_log") == "1\n" + + node.query("drop table system.query_log sync") + node.restart_clickhouse() + node.query("system flush logs") + assert ( + node.query("select count() >= 0 from system.query_log") == "1\n" + ) # we check that query_log just exists + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index c53c2bb1ddf..4bd96e2756d 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -121,21 +121,15 @@ def node_update_config(mode, setting, value=None): node.restart_clickhouse() -def assert_took(took, should_took): +def assert_took(took, should_take): # we need to decrease the lower limit because the server limits could # be enforced by throttling some server background IO instead of query IO # and we have no control over it - # - # and the same for upper limit, it can be slightly larger, due to for - # instance network latencies or CPU starvation - if should_took > 0: - assert took >= should_took * 0.85 and took <= should_took * 1.8 - else: - assert took >= should_took * 0.85 + assert took >= should_take * 0.85 @pytest.mark.parametrize( - "policy,backup_name,mode,setting,value,should_took", + "policy,backup_name,mode,setting,value,should_take", [ # # Local -> Local @@ -149,7 +143,7 @@ def assert_took(took, should_took): 0, id="no_local_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", next_backup_name("local"), @@ -159,7 +153,7 @@ def assert_took(took, should_took): 7, id="user_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", next_backup_name("local"), @@ -181,7 +175,7 @@ def assert_took(took, should_took): 0, id="no_remote_to_local_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", next_backup_name("local"), @@ -191,7 +185,7 @@ def assert_took(took, should_took): 7, id="user_remote_to_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", next_backup_name("local"), @@ -252,7 +246,7 @@ def assert_took(took, should_took): 0, id="no_local_to_remote_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", next_backup_name("remote"), @@ -262,7 +256,7 @@ def assert_took(took, should_took): 7, id="user_local_to_remote_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", next_backup_name("remote"), @@ -274,7 +268,7 @@ def assert_took(took, should_took): ), ], ) -def test_backup_throttling(policy, backup_name, mode, setting, value, should_took): +def test_backup_throttling(policy, backup_name, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -284,7 +278,7 @@ def test_backup_throttling(policy, backup_name, mode, setting, value, should_too """ ) _, took = elapsed(node.query, f"backup table data to {backup_name}") - assert_took(took, should_took) + assert_took(took, should_take) def test_backup_throttling_override(): @@ -305,18 +299,18 @@ def test_backup_throttling_override(): "max_backup_bandwidth": "500K", }, ) - # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds + # reading 1e6*8 bytes with 500Ki default bandwidth should take (8-0.5)/0.5=15 seconds assert_took(took, 15) @pytest.mark.parametrize( - "policy,mode,setting,value,should_took", + "policy,mode,setting,value,should_take", [ # # Local # pytest.param("default", None, None, None, 0, id="no_local_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", "user", @@ -325,7 +319,7 @@ def test_backup_throttling_override(): 7, id="user_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", "server", @@ -338,7 +332,7 @@ def test_backup_throttling_override(): # Remote # pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", "user", @@ -347,7 +341,7 @@ def test_backup_throttling_override(): 7, id="user_remote_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", "server", @@ -358,7 +352,7 @@ def test_backup_throttling_override(): ), ], ) -def test_read_throttling(policy, mode, setting, value, should_took): +def test_read_throttling(policy, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -368,17 +362,17 @@ def test_read_throttling(policy, mode, setting, value, should_took): """ ) _, took = elapsed(node.query, f"select * from data") - assert_took(took, should_took) + assert_took(took, should_take) @pytest.mark.parametrize( - "policy,mode,setting,value,should_took", + "policy,mode,setting,value,should_take", [ # # Local # pytest.param("default", None, None, None, 0, id="no_local_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", "user", @@ -387,7 +381,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): 7, id="local_user_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", "server", @@ -400,7 +394,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): # Remote # pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), - # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # writing 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", "user", @@ -409,7 +403,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): 7, id="user_remote_throttling", ), - # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # writing 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", "server", @@ -420,7 +414,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): ), ], ) -def test_write_throttling(policy, mode, setting, value, should_took): +def test_write_throttling(policy, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -429,7 +423,7 @@ def test_write_throttling(policy, mode, setting, value, should_took): """ ) _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") - assert_took(took, should_took) + assert_took(took, should_take) def test_max_mutations_bandwidth_for_server(): @@ -444,7 +438,7 @@ def test_max_mutations_bandwidth_for_server(): node.query, "alter table data update key = -key where 1 settings mutations_sync = 1", ) - # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds assert_took(took, 7) @@ -457,5 +451,5 @@ def test_max_merges_bandwidth_for_server(): ) node.query("insert into data select * from numbers(1e6)") _, took = elapsed(node.query, "optimize table data final") - # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds assert_took(took, 7) diff --git a/tests/integration/test_version_update/configs/log_conf.xml b/tests/integration/test_version_update/configs/log_conf.xml index f9d15e572aa..27c7107ce5e 100644 --- a/tests/integration/test_version_update/configs/log_conf.xml +++ b/tests/integration/test_version_update/configs/log_conf.xml @@ -1,4 +1,4 @@ - + trace /var/log/clickhouse-server/log.log @@ -8,4 +8,4 @@ /var/log/clickhouse-server/stderr.log /var/log/clickhouse-server/stdout.log - + diff --git a/tests/performance/all_join_opt.xml b/tests/performance/all_join_opt.xml new file mode 100644 index 00000000000..0ab9c39f67c --- /dev/null +++ b/tests/performance/all_join_opt.xml @@ -0,0 +1,15 @@ + + CREATE TABLE test (a Int64, b String, c LowCardinality(String)) ENGINE = MergeTree() ORDER BY a + CREATE TABLE test1 (a Int64, b String, c LowCardinality(String)) ENGINE = MergeTree() ORDER BY a + + INSERT INTO test SELECT number % 10000, number % 10000, number % 10000 FROM numbers(10000000) + INSERT INTO test1 SELECT number % 1000 , number % 1000, number % 1000 FROM numbers(100000) + + SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b + + DROP TABLE IF EXISTS test + DROP TABLE IF EXISTS test1 + \ No newline at end of file diff --git a/tests/performance/insert_select_squashing.xml b/tests/performance/insert_select_squashing.xml new file mode 100644 index 00000000000..4c2c88f3d22 --- /dev/null +++ b/tests/performance/insert_select_squashing.xml @@ -0,0 +1,23 @@ + + + 1000 + + + +CREATE TABLE squash_performance +( + s1 String, + s2 Nullable(String), + a1 Array(Array(String)), + a2 Array(Array(UInt32)), + m1 Map(String, Array(String)), + m2 Map(String, Array(UInt64)), + t Tuple(String, Array(String), Map(String, String)) +) +ENGINE = Null; + + + INSERT INTO squash_performance SELECT * FROM generateRandom(42) LIMIT 500000 + + DROP TABLE IF EXISTS squash_performance + diff --git a/tests/performance/insert_select_squashing_dynamic.xml b/tests/performance/insert_select_squashing_dynamic.xml new file mode 100644 index 00000000000..f7f600fd8bd --- /dev/null +++ b/tests/performance/insert_select_squashing_dynamic.xml @@ -0,0 +1,59 @@ + + + 1000 + 0 + 1 + + + +CREATE TABLE dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Null; + + + +CREATE TABLE dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Null; + + + +CREATE TABLE src_dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Memory; + + + +CREATE TABLE src_dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Memory; + + + + + + + + + + + + INSERT INTO dynamic_squash_performance_1 SELECT number::Dynamic FROM numbers(10000000) + INSERT INTO dynamic_squash_performance_1 SELECT range(number % 100)::Dynamic FROM numbers(2000000) + INSERT INTO dynamic_squash_performance_1 SELECT * FROM src_dynamic_squash_performance_1 + INSERT INTO dynamic_squash_performance_2 SELECT * FROM src_dynamic_squash_performance_2 + + DROP TABLE IF EXISTS dynamic_squash_performance_1 + DROP TABLE IF EXISTS dynamic_squash_performance_2 + DROP TABLE IF EXISTS src_dynamic_squash_performance_1 + DROP TABLE IF EXISTS src_dynamic_squash_performance_2 + + diff --git a/tests/performance/insert_select_squashing_variant.xml b/tests/performance/insert_select_squashing_variant.xml new file mode 100644 index 00000000000..5c59fc7b50f --- /dev/null +++ b/tests/performance/insert_select_squashing_variant.xml @@ -0,0 +1,34 @@ + + + 1000 + 0 + 1 + 1 + + + +CREATE TABLE variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Null; + + + +CREATE TABLE src_variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Memory; + + + + + + + INSERT INTO variant_squash_performance SELECT * FROM src_variant_squash_performance + + DROP TABLE IF EXISTS variant_squash_performance + DROP TABLE IF EXISTS src_variant_squash_performance + + diff --git a/tests/performance/json_type.xml b/tests/performance/json_type.xml index b6406f52579..db3fd844f89 100644 --- a/tests/performance/json_type.xml +++ b/tests/performance/json_type.xml @@ -27,9 +27,9 @@ - CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple() - CREATE TABLE t_json_2(data JSON) ENGINE = MergeTree ORDER BY tuple() - CREATE TABLE t_json_3(data JSON) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_1(data Object('json')) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_2(data Object('json')) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_3(data Object('json')) ENGINE = MergeTree ORDER BY tuple() INSERT INTO t_json_1 SELECT materialize({json1}) FROM numbers(200000) INSERT INTO t_json_2 SELECT {json2} FROM numbers(100000) diff --git a/tests/performance/materialized_view_deduplication.xml b/tests/performance/materialized_view_deduplication.xml new file mode 100644 index 00000000000..e5e0e5fc6e4 --- /dev/null +++ b/tests/performance/materialized_view_deduplication.xml @@ -0,0 +1,33 @@ + + + 1 + + + CREATE TABLE dst (`key` Int64, `value` String) + ENGINE = MergeTree ORDER BY tuple() + SETTINGS non_replicated_deduplication_window=1000; + + + CREATE TABLE mv_dst (`key` Int64, `value` String) + ENGINE = MergeTree ORDER BY tuple() + SETTINGS non_replicated_deduplication_window=1000; + + + CREATE MATERIALIZED VIEW mv_first TO mv_dst + AS SELECT 0 AS key, value AS value FROM dst; + + + CREATE MATERIALIZED VIEW mv_second TO mv_dst + AS SELECT 0 AS key, value AS value FROM dst; + + INSERT INTO dst SELECT number as key, toString(number) from numbers(1000); + + + INSERT INTO dst SELECT number as key, toString(number) from numbers(1000); + + + DROP TABLE IF EXISTS dst + DROP TABLE IF EXISTS mv_dst + DROP TABLE IF EXISTS mv_first + DROP TABLE IF EXISTS mv_second + diff --git a/tests/performance/new_json_type.xml b/tests/performance/new_json_type.xml new file mode 100644 index 00000000000..1ad21850c6c --- /dev/null +++ b/tests/performance/new_json_type.xml @@ -0,0 +1,41 @@ + + + 1 + + + + + + + json1 + + '{"k1":1, "k2": "some"}' + + + + json2 + + '{"col' || toString(number % 100) || '":' || toString(number) || '}' + + + + json3 + + '{"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}' + + + + + CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_2(data JSON) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_3(data JSON) ENGINE = MergeTree ORDER BY tuple() + + INSERT INTO t_json_1 SELECT materialize({json1}) FROM numbers(200000) + INSERT INTO t_json_2 SELECT {json2} FROM numbers(100000) + INSERT INTO t_json_3 SELECT materialize({json3}) FROM numbers_mt(100000) + + DROP TABLE IF EXISTS t_json_1 + DROP TABLE IF EXISTS t_json_2 + DROP TABLE IF EXISTS t_json_3 + diff --git a/tests/performance/optimize_functions_to_subcolumns.xml b/tests/performance/optimize_functions_to_subcolumns.xml new file mode 100644 index 00000000000..146af1605c4 --- /dev/null +++ b/tests/performance/optimize_functions_to_subcolumns.xml @@ -0,0 +1,26 @@ + + + 4 + + + + CREATE TABLE t_subcolumns (a Array(UInt64), s Nullable(String), m Map(String, UInt64)) ENGINE = MergeTree ORDER BY tuple() + + + + INSERT INTO t_subcolumns SELECT range(number % 20), toString(number), mapFromArrays(range(number % 20), range(number % 20)) FROM numbers_mt(50000000) + + + + OPTIMIZE TABLE t_subcolumns FINAL + + + SELECT count() FROM t_subcolumns WHERE NOT ignore(length(a)) + SELECT count() FROM t_subcolumns WHERE notEmpty(a) + SELECT count() FROM t_subcolumns WHERE NOT ignore(length(m)) + SELECT count() FROM t_subcolumns WHERE notEmpty(m) + SELECT count() FROM t_subcolumns WHERE isNotNull(s) + SELECT count(s) FROM t_subcolumns + + DROP TABLE t_subcolumns + diff --git a/tests/performance/parallel_mv.xml b/tests/performance/parallel_mv.xml index 5b856740a19..0bf5ed1be09 100644 --- a/tests/performance/parallel_mv.xml +++ b/tests/performance/parallel_mv.xml @@ -11,13 +11,13 @@ create table mt_4 (n UInt64, s String) engine = MergeTree order by tuple() create materialized view mv_1 to mt_1 as - select number, toString(number) from main_table where number % 13 != 0 + select number as n, toString(number) as s from main_table where number % 13 != 0 create materialized view mv_2 to mt_2 as - select number, toString(number) from main_table where number % 13 != 1 + select number as n, toString(number) as s from main_table where number % 13 != 1 create materialized view mv_3 to mt_3 as - select number, toString(number) from main_table where number % 13 != 3 + select number as n, toString(number) as s from main_table where number % 13 != 3 create materialized view mv_4 to mt_4 as - select number, toString(number) from main_table where number % 13 != 4 + select number as n, toString(number) as s from main_table where number % 13 != 4 SYSTEM STOP MERGES main_table SYSTEM STOP MERGES mt_1 diff --git a/tests/performance/parquet_read_with_index.xml b/tests/performance/parquet_read_with_index.xml new file mode 100644 index 00000000000..1bb2d8eb4a2 --- /dev/null +++ b/tests/performance/parquet_read_with_index.xml @@ -0,0 +1,30 @@ + + + INSERT INTO FUNCTION file('test_pq_index', Parquet) SELECT * FROM generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Tuple(i UInt32, j UInt32)),array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000 SETTINGS output_format_parquet_use_custom_encoder=false, output_format_parquet_write_page_index=true + + + + SELECT * FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet) Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.c.i FROM file('test_pq_index', Parquet) Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'array_tuple_column Array (Tuple(a Nullable(String)))') Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'map_tuple_column Map(String, Tuple(a Nullable(String)))') Format Null + + + diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml index 867108ac2b7..0e67abb275e 100644 --- a/tests/performance/storage_join_direct_join.xml +++ b/tests/performance/storage_join_direct_join.xml @@ -16,4 +16,7 @@ SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS allow_experimental_analyzer=1 + + DROP TABLE IF EXISTS keys + DROP TABLE IF EXISTS dict diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 53321afc94c..0f13217c236 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -9,6 +9,8 @@ system flush logs; drop table if exists logs; create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time; +SET max_rows_to_read = 0; + -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 WITH 0.001 AS threshold diff --git a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql index 2f0ef648983..4b87b2af28d 100644 --- a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql +++ b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql @@ -1 +1 @@ -SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 +SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference index df5aa77af60..7534c12a0d8 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference @@ -1,20 +1,20 @@ -7040546 -7040546 -4327029 -4327029 -1613512 -1613512 -8947307 -8947307 -6233790 -6233790 -3520273 -3520273 -806756 -806756 -8140551 -8140551 -5427034 -5427034 -2713517 -2713517 +4437158 +4437158 +1723641 +1723641 +3630402 +3630402 +916885 +916885 +2823646 +2823646 +110129 +110129 +4730407 +4730407 +2016890 +2016890 +3923651 +3923651 +1210134 +1210134 diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 112f5edae36..9e06654195d 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,11 +1,12 @@ --- Tags: distributed +-- Tags: distributed, long, no-flaky-check +-- ^ no-flaky-check - sometimes longer than 600s with ThreadFuzzer. -SET max_memory_usage = 300000000; -SET max_bytes_before_external_sort = 20000000; +SET max_memory_usage = 150000000; +SET max_bytes_before_external_sort = 10000000; DROP TABLE IF EXISTS numbers10m; -CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 10000000; +CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000; -SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20; +SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 4999980, 20; DROP TABLE numbers10m; diff --git a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh index 5c3918dea9f..4916721764c 100755 --- a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh +++ b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -o errexit set -o pipefail -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS users; CREATE TABLE users (UserID UInt64) ENGINE = Log; INSERT INTO users VALUES (1468013291393583084); diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index f202cb75513..b1cb9ad5b57 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -22,3 +22,8 @@ 1 1 1 +1 +1 +1 +1 +2 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 4caba2033ff..7c7bbac0df3 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; @@ -27,3 +30,14 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n; select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; + +-- Turkish language +select upperUTF8('ır') = 'IR'; +select lowerUTF8('ır') = 'ır'; + +-- German language +select upper('öäüß') = 'öäüß'; +select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; + +-- Bug 68680 +SELECT lengthUTF8(lowerUTF8('Ä\0')); diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql index dd7394bc39a..d6668cb7ba4 100644 --- a/tests/queries/0_stateless/00233_position_function_family.sql +++ b/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SET send_logs_level = 'fatal'; select 1 = position('', ''); diff --git a/tests/queries/0_stateless/00284_external_aggregation.reference b/tests/queries/0_stateless/00284_external_aggregation.reference index be0db217a97..48e30e781e0 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.reference +++ b/tests/queries/0_stateless/00284_external_aggregation.reference @@ -1,22 +1,2 @@ 49999995000000 10000000 499999500000 1000000 15 -100033 2 -100034 2 -100035 2 -100036 2 -100037 2 -100038 2 -100039 2 -10004 2 -100040 2 -100041 2 -100033 2 -100034 2 -100035 2 -100036 2 -100037 2 -100038 2 -100039 2 -10004 2 -100040 2 -100041 2 diff --git a/tests/queries/0_stateless/00284_external_aggregation.sql b/tests/queries/0_stateless/00284_external_aggregation.sql index c1140faaa28..cdc31ff68c8 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/tests/queries/0_stateless/00284_external_aggregation.sql @@ -1,5 +1,8 @@ -- Tags: long +-- This test was split in two due to long runtimes in sanitizers. +-- The other part is 00284_external_aggregation_2. + SET max_bytes_before_external_group_by = 100000000; SET max_memory_usage = 410000000; SET group_by_two_level_threshold = 100000; @@ -7,19 +10,3 @@ SET group_by_two_level_threshold_bytes = 50000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); - -SET max_memory_usage = 0; -SET group_by_two_level_threshold = 100000; -SET max_bytes_before_external_group_by = '1Mi'; - --- method: key_string & key_string_two_level -CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); -INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); -SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; - --- method: low_cardinality_key_string & low_cardinality_key_string_two_level -CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); -INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); -SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; diff --git a/tests/queries/0_stateless/00284_external_aggregation_2.reference b/tests/queries/0_stateless/00284_external_aggregation_2.reference new file mode 100644 index 00000000000..71d2e96d4b0 --- /dev/null +++ b/tests/queries/0_stateless/00284_external_aggregation_2.reference @@ -0,0 +1,20 @@ +100033 2 +100034 2 +100035 2 +100036 2 +100037 2 +100038 2 +100039 2 +10004 2 +100040 2 +100041 2 +100033 2 +100034 2 +100035 2 +100036 2 +100037 2 +100038 2 +100039 2 +10004 2 +100040 2 +100041 2 diff --git a/tests/queries/0_stateless/00284_external_aggregation_2.sql b/tests/queries/0_stateless/00284_external_aggregation_2.sql new file mode 100644 index 00000000000..7960e3894d0 --- /dev/null +++ b/tests/queries/0_stateless/00284_external_aggregation_2.sql @@ -0,0 +1,22 @@ +-- Tags: long + +-- This test was split in two due to long runtimes in sanitizers. +-- The other part is 00284_external_aggregation. + +SET group_by_two_level_threshold_bytes = 50000000; +SET max_memory_usage = 0; +SET group_by_two_level_threshold = 100000; +SET max_bytes_before_external_group_by = '1Mi'; + +-- method: key_string & key_string_two_level +CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); +INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); +SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; + +-- method: low_cardinality_key_string & low_cardinality_key_string_two_level +CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); +INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); +SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; + diff --git a/tests/queries/0_stateless/00366_multi_statements.sh b/tests/queries/0_stateless/00366_multi_statements.sh index 0b2e80fe457..8546e547581 100755 --- a/tests/queries/0_stateless/00366_multi_statements.sh +++ b/tests/queries/0_stateless/00366_multi_statements.sh @@ -14,22 +14,22 @@ $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2" -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2;" -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="DROP TABLE IF EXISTS t_00366; CREATE TABLE t_00366 (x UInt64) ENGINE = TinyLog;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS t_00366; CREATE TABLE t_00366 (x UInt64) ENGINE = TinyLog;" $CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES (1),(2),(3);" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" $CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="INSERT INTO t_00366 VALUES (1),(2),(3);" -$CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" -$CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" +$CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES (1),(2),(3);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" +$CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" +$CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1;" @@ -48,4 +48,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+t_00366+VALUES" -d "(7),(8),(9)" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="DROP TABLE t_00366;" +$CLICKHOUSE_CLIENT --query="DROP TABLE t_00366;" diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 8a310cb8fc9..c8a243d9b27 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: shard, long DROP TABLE IF EXISTS group_uniq_str; CREATE TABLE group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '100M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index abd0e6e6a45..4453c26283c 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -1,4 +1,6 @@ --- Tags: shard +-- Tags: long + +SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_int; CREATE TABLE group_uniq_arr_int ENGINE = Memory AS diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index e9cfff211f8..1ec91ac2396 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -1,4 +1,5 @@ --- Tags: shard +-- Tags: shard, long +SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; CREATE TABLE group_uniq_arr_str ENGINE = Memory AS diff --git a/tests/queries/0_stateless/00408_http_keep_alive.reference b/tests/queries/0_stateless/00408_http_keep_alive.reference index 17a7fd690a8..5402036bfd7 100644 --- a/tests/queries/0_stateless/00408_http_keep_alive.reference +++ b/tests/queries/0_stateless/00408_http_keep_alive.reference @@ -1,6 +1,6 @@ < Connection: Keep-Alive -< Keep-Alive: timeout=10 +< Keep-Alive: timeout=10, max=? < Connection: Keep-Alive -< Keep-Alive: timeout=10 +< Keep-Alive: timeout=10, max=? < Connection: Keep-Alive -< Keep-Alive: timeout=10 +< Keep-Alive: timeout=10, max=? diff --git a/tests/queries/0_stateless/00408_http_keep_alive.sh b/tests/queries/0_stateless/00408_http_keep_alive.sh index 4bd0e494eb8..4a1cb4ed712 100755 --- a/tests/queries/0_stateless/00408_http_keep_alive.sh +++ b/tests/queries/0_stateless/00408_http_keep_alive.sh @@ -6,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/" -${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< "SELECT 1" 2>&1 | perl -lnE 'print if /Keep-Alive/'; -${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< " error here " 2>&1 | perl -lnE 'print if /Keep-Alive/'; -${CLICKHOUSE_CURL} -vsS "${URL}"ping 2>&1 | perl -lnE 'print if /Keep-Alive/'; +# the sed command here replaces the real number of left requests with a question mark, because it can vary and we don't really have control over it +${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< "SELECT 1" 2>&1 | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive'; +${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< " error here " 2>&1 | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive'; +${CLICKHOUSE_CURL} -vsS "${URL}"ping 2>&1 | perl -lnE 'print if /Keep-Alive/' | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive'; # no keep-alive: ${CLICKHOUSE_CURL} -vsS "${URL}"404/not/found/ 2>&1 | perl -lnE 'print if /Keep-Alive/'; diff --git a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh index 27b9f5c00c7..0635fbc2a57 100755 --- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh +++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh @@ -43,10 +43,10 @@ popd > /dev/null #SCRIPTDIR=`dirname "$SCRIPTPATH"` SCRIPTDIR=$SCRIPTPATH -cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=10 -n > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout +cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=10 > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout cmp "$SCRIPTDIR"/00282_merging.reference "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout && echo PASSED || echo FAILED -cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=20 -n > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout +cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=20 > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout cmp "$SCRIPTDIR"/00282_merging.reference "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout && echo PASSED || echo FAILED rm "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index 6ee1649c9ed..86902fca4aa 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -74,7 +74,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE t" echo "A session cannot be used by concurrent connections:" -${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null & +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9&max_rows_to_read=0" --data-binary "SELECT count() FROM system.numbers" >/dev/null & # An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check while true diff --git a/tests/queries/0_stateless/00501_http_head.reference b/tests/queries/0_stateless/00501_http_head.reference index 8351327b356..807bcd4922e 100644 --- a/tests/queries/0_stateless/00501_http_head.reference +++ b/tests/queries/0_stateless/00501_http_head.reference @@ -2,11 +2,11 @@ HTTP/1.1 200 OK Connection: Keep-Alive Content-Type: text/tab-separated-values; charset=UTF-8 Transfer-Encoding: chunked -Keep-Alive: timeout=10 +Keep-Alive: timeout=10, max=? HTTP/1.1 200 OK Connection: Keep-Alive Content-Type: text/tab-separated-values; charset=UTF-8 Transfer-Encoding: chunked -Keep-Alive: timeout=10 +Keep-Alive: timeout=10, max=? diff --git a/tests/queries/0_stateless/00501_http_head.sh b/tests/queries/0_stateless/00501_http_head.sh index 60283f26833..30da64c31f0 100755 --- a/tests/queries/0_stateless/00501_http_head.sh +++ b/tests/queries/0_stateless/00501_http_head.sh @@ -4,8 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201"; - ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" | grep -v "X-ClickHouse-Format:" | grep -v "X-ClickHouse-Timezone:" +# the sed command here replaces the real number of left requests with a question mark, because it can vary and we don't really have control over it +( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201" | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I'; + ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" | grep -v "X-ClickHouse-Format:" | grep -v "X-ClickHouse-Timezone:" if [[ $(${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}&query=SELECT+1" | grep -c '411 Length Required') -ne 1 ]]; then echo FAIL diff --git a/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh b/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh index 560b97a1d1b..5550fa69d3d 100755 --- a/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh +++ b/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_readonly; CREATE TABLE test_readonly ( ID Int @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -n --query=" ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; CREATE TEMPORARY TABLE readonly ( ID Int @@ -26,7 +26,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -34,7 +34,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; DROP TABLE test_readonly; " 2> /dev/null; @@ -46,7 +46,7 @@ CODE=$?; ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; CREATE TEMPORARY TABLE readonly ( ID Int @@ -58,7 +58,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -66,7 +66,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; DROP TABLE test_readonly; " 2> /dev/null; @@ -78,7 +78,7 @@ CODE=$?; ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; CREATE TEMPORARY TABLE readonly ( ID Int @@ -90,7 +90,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -98,7 +98,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; DROP TABLE test_readonly; " 2> /dev/null; diff --git a/tests/queries/0_stateless/00550_join_insert_select.sh b/tests/queries/0_stateless/00550_join_insert_select.sh index bfaccb613ca..ee2f3ab286b 100755 --- a/tests/queries/0_stateless/00550_join_insert_select.sh +++ b/tests/queries/0_stateless/00550_join_insert_select.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --ignore-error --query=" +$CLICKHOUSE_CLIENT --ignore-error --query=" DROP TABLE IF EXISTS test1_00550; DROP TABLE IF EXISTS test2_00550; DROP TABLE IF EXISTS test3_00550; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 7a71d17f19b..7d4125eea69 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -6,41 +6,56 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TEST_PREFIX=$RANDOM +TEST_PREFIX="${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" -${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1, max_rows_to_read=0" ${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() { - while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done + while [[ 0 -eq $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") ]] + do + sleep 0.1 + done +} + +function wait_for_queries_to_finish() +{ + while [[ 0 -ne $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query NOT LIKE '%this query%'") ]] + do + sleep 0.1 + done } -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & -wait_for_query_to_start 'hello' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}hello&replace_running_query=1&max_rows_to_read=0" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & +wait_for_query_to_start "${CLICKHOUSE_DATABASE}hello" # Replace it -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 0' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}hello&replace_running_query=1" -d 'SELECT 0' # Wait for it to be replaced wait +wait_for_queries_to_finish -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & -wait_for_query_to_start '42' +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id="${CLICKHOUSE_DATABASE}42" --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'QUERY_WAS_CANCELLED' & +wait_for_query_to_start "${CLICKHOUSE_DATABASE}42" # Trying to run another query with the same query_id -${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is already running by user' +${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --query='SELECT 43' 2>&1 | grep -cF 'is already running by user' # Trying to replace query of a different user -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}42' SYNC" > /dev/null wait +wait_for_queries_to_finish -${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & -wait_for_query_to_start '42' -${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null +${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --max_rows_to_read=0 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'QUERY_WAS_CANCELLED' & +wait_for_query_to_start "${CLICKHOUSE_DATABASE}42" +${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait -${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' +wait_for_queries_to_finish + +${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --replace_running_query=1 --query='SELECT 44' ${CLICKHOUSE_CLIENT} -q "drop user u_00600${TEST_PREFIX}" diff --git a/tests/queries/0_stateless/00601_kill_running_query.reference b/tests/queries/0_stateless/00601_kill_running_query.reference index 3917ff89482..7824d5804bc 100644 --- a/tests/queries/0_stateless/00601_kill_running_query.reference +++ b/tests/queries/0_stateless/00601_kill_running_query.reference @@ -1 +1 @@ -waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) +waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0 diff --git a/tests/queries/0_stateless/00601_kill_running_query.sh b/tests/queries/0_stateless/00601_kill_running_query.sh index 3163f8146d0..be0fff49129 100755 --- a/tests/queries/0_stateless/00601_kill_running_query.sh +++ b/tests/queries/0_stateless/00601_kill_running_query.sh @@ -11,7 +11,7 @@ function wait_for_query_to_start() while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } -${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k)' > /dev/null & +${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0' > /dev/null & wait_for_query_to_start "test_00601_$CLICKHOUSE_DATABASE" $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_00601_$CLICKHOUSE_DATABASE'" wait diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index ae9b6bb7b2c..a631cbb8b86 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -2,6 +2,9 @@ SET joined_subquery_requires_alias = 0; +-- We are no longer interested in the old analyzer. +SET allow_experimental_analyzer = 1; + -- This test (SELECT) without cache can take tens minutes DROP TABLE IF EXISTS dict_string; DROP TABLE IF EXISTS dict_ui64; @@ -41,8 +44,6 @@ SETTINGS index_granularity = 8192; CREATE TABLE dict_string (entityIri String) ENGINE = Memory; CREATE TABLE dict_ui64 (learnerId UInt64) ENGINE = Memory; ---SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM `CloM8CwMR2`) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN `CloM8CwMR2` WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) FORMAT JSON; - SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM dict_string) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN dict_string WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`); @@ -55,8 +56,8 @@ DROP TABLE video_views; --- Test for tsan: Ensure cache used from one thread -SET max_threads = 32; +-- Test for tsan: Ensure cache is used from one thread +SET max_threads = 32, max_memory_usage = '10G'; DROP TABLE IF EXISTS sample_00632; @@ -173,7 +174,6 @@ FROM UNION ALL SELECT * FROM ( SELECT * FROM sample_00632 WHERE x > 0 ) ) GROUP BY x - --HAVING c = 1 ORDER BY x ASC ); DROP TABLE sample_00632; diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 93fd0c4a977..e9a4369a5bf 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -19,13 +19,13 @@ settings="$server_logs --log_queries=1 --log_query_threads=1 --log_profile_event # Test insert logging on each block and checkPacket() method -$CLICKHOUSE_CLIENT $settings -n -q " +$CLICKHOUSE_CLIENT $settings -q " DROP TABLE IF EXISTS null_00634; CREATE TABLE null_00634 (i UInt8) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();" head -c 1000 /dev/zero | $CLICKHOUSE_CLIENT $settings --max_insert_block_size=10 --min_insert_block_size_rows=1 --min_insert_block_size_bytes=1 -q "INSERT INTO null_00634 FORMAT RowBinary" -$CLICKHOUSE_CLIENT $settings -n -q " +$CLICKHOUSE_CLIENT $settings -q " SELECT count() FROM null_00634; DROP TABLE null_00634;" diff --git a/tests/queries/0_stateless/00652_mergetree_mutations.sh b/tests/queries/0_stateless/00652_mergetree_mutations.sh index a9d7908a1af..edb306d3883 100755 --- a/tests/queries/0_stateless/00652_mergetree_mutations.sh +++ b/tests/queries/0_stateless/00652_mergetree_mutations.sh @@ -70,6 +70,21 @@ sleep 1 ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_cleaner(x) VALUES (4)" sleep 0.1 +for i in {1..10} +do + + if [ "$(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner'")" -eq 2 ]; then + break + fi + + if [[ $i -eq 100 ]]; then + echo "Timed out while waiting for outdated mutation record to be deleted!" + fi + + sleep 1 + ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_cleaner(x) VALUES (4)" +done + # Check that the first mutation is cleaned ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, is_done FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner' ORDER BY mutation_id" diff --git a/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh b/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh index 96d5764780f..d69e14bdbb9 100755 --- a/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh +++ b/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS fetches_r1 SYNC; DROP TABLE IF EXISTS fetches_r2 SYNC" @@ -17,7 +17,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE fetches_r2(x UInt32) ENGINE Replicate SETTINGS prefer_fetch_merged_part_time_threshold=0, \ prefer_fetch_merged_part_size_threshold=0" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET insert_keeper_fault_injection_probability=0; INSERT INTO fetches_r1 VALUES (1); INSERT INTO fetches_r1 VALUES (2); @@ -51,6 +51,6 @@ ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r2" ${CLICKHOUSE_CLIENT} --query="SELECT '*** Check data after fetch/clone of mutated part ***'" ${CLICKHOUSE_CLIENT} --query="SELECT _part, * FROM fetches_r2 ORDER BY x" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE fetches_r1 SYNC; DROP TABLE fetches_r2 SYNC" diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference index 329ad36ad3c..a93bf12b77a 100644 --- a/tests/queries/0_stateless/00727_concat.reference +++ b/tests/queries/0_stateless/00727_concat.reference @@ -34,6 +34,7 @@ With 2023-11-14 05:50:12.123 With hallo With [\'foo\',\'bar\'] With {"foo":"bar"} +With {"foo":"bar"} With (42,\'foo\') With {42:\'foo\'} With 122.233.64.201 diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql index 76dae541261..65cd019cc13 100644 --- a/tests/queries/0_stateless/00727_concat.sql +++ b/tests/queries/0_stateless/00727_concat.sql @@ -2,6 +2,7 @@ -- no-fasttest: json type needs rapidjson library, geo types need s2 geometry SET allow_experimental_object_type = 1; +SET allow_experimental_json_type = 1; SET allow_suspicious_low_cardinality_types=1; SELECT '-- Const string + non-const arbitrary type'; @@ -40,6 +41,7 @@ SELECT concat('With ', materialize('2023-11-14 05:50:12.123' :: DateTime64(3, 'E SELECT concat('With ', materialize('hallo' :: Enum('hallo' = 1))); SELECT concat('With ', materialize(['foo', 'bar'] :: Array(String))); SELECT concat('With ', materialize('{"foo": "bar"}' :: JSON)); +SELECT concat('With ', materialize('{"foo": "bar"}' :: Object('json'))); SELECT concat('With ', materialize((42, 'foo') :: Tuple(Int32, String))); SELECT concat('With ', materialize(map(42, 'foo') :: Map(Int32, String))); SELECT concat('With ', materialize('122.233.64.201' :: IPv4)); diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh index 09f20284402..989096a26d6 100755 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh @@ -25,83 +25,83 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM distributed WHERE a = 0 AND b | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Should pass now -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0; " # Should still fail because of matching unavailable shard -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Try more complext expressions for constant folding - all should pass. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 1 AND a = 0 AND b = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a IN (0, 1) AND b IN (0, 1); " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR a = 1 AND b = 1; " # TODO: should pass one day. -#${CLICKHOUSE_CLIENT} -n --query=" +#${CLICKHOUSE_CLIENT} --query=" # SET optimize_skip_unused_shards = 1; # SELECT count(*) FROM distributed WHERE a = 0 AND b >= 0 AND b <= 1; #" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND c = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND c != 10; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND (a+b)*b != 12; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE (a = 0 OR a = 1) AND (b = 0 OR b = 1); " # These ones should fail. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b <= 1; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND c = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 OR a = 1 AND b = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR c = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh index 035907bddd7..b3dff2ea69a 100755 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh @@ -30,73 +30,73 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM distributed_00754 PREWHERE a | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Should pass now -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0; " # Should still fail because of matching unavailable shard -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Try more complex expressions for constant folding - all should pass. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 1 AND a = 0 WHERE b = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 1 WHERE b = 1 AND length(c) = 5; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a IN (0, 1) AND b IN (0, 1) WHERE c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a IN (0, 1) WHERE b IN (0, 1) AND c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR a = 1 AND b = 1 WHERE c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE (a = 0 OR a = 1) WHERE (b = 0 OR b = 1); " # These should fail. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b <= 1; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 WHERE c LIKE '%l%'; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 OR a = 1 AND b = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR c LIKE '%l%'; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql index de20b894331..a0ab55edc15 100644 --- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql +++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql @@ -1 +1,4 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0'); diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference index a2178f5eda7..a6e03404f2b 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference index 31a4360469f..ff70403ce7a 100644 --- a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh index 12d889a7137..8f7a1a9ae98 100755 --- a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh +++ b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')") REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS part_header_r1; DROP TABLE IF EXISTS part_header_r2; @@ -62,7 +62,7 @@ do [[ $count1 == 1 && $count2 == 1 ]] && break done -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SELECT '*** Test part removal ***'; SELECT '*** replica 1 ***'; diff --git a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 5c21c70e06a..dd3735f27b1 100755 --- a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&max_rows_to_read=0&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' i=0 retries=300 while [[ $i -lt $retries ]]; do diff --git a/tests/queries/0_stateless/00837_minmax_index.sh b/tests/queries/0_stateless/00837_minmax_index.sh index e4de0b9ebfc..ff487f50ee0 100755 --- a/tests/queries/0_stateless/00837_minmax_index.sh +++ b/tests/queries/0_stateless/00837_minmax_index.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00838_unique_index.sh b/tests/queries/0_stateless/00838_unique_index.sh index b267b6a8eb3..a3aba4f26b6 100755 --- a/tests/queries/0_stateless/00838_unique_index.sh +++ b/tests/queries/0_stateless/00838_unique_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh index 1bafb033f56..3a7022ac0cf 100755 --- a/tests/queries/0_stateless/00900_long_parquet_load.sh +++ b/tests/queries/0_stateless/00900_long_parquet_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-debug +# Tags: long, no-fasttest, no-debug, no-asan, no-msan, no-tsan # # Load all possible .parquet files found in submodules. diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 55eacd0db44..337fba865fd 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,3 +1,6 @@ +-- Tags: long + +SET max_rows_to_read = '100M'; drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_00906 select '0123456789' from numbers(100000000); diff --git a/tests/queries/0_stateless/00907_set_index_max_rows.sh b/tests/queries/0_stateless/00907_set_index_max_rows.sh index 3707aaf2ca6..bdd0f36346f 100755 --- a/tests/queries/0_stateless/00907_set_index_max_rows.sh +++ b/tests/queries/0_stateless/00907_set_index_max_rows.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00908_bloom_filter_index.sh b/tests/queries/0_stateless/00908_bloom_filter_index.sh index 25a6567b894..3bd169dd6df 100755 --- a/tests/queries/0_stateless/00908_bloom_filter_index.sh +++ b/tests/queries/0_stateless/00908_bloom_filter_index.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx3;" # NGRAM BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx ( k UInt64, @@ -22,7 +22,7 @@ CREATE TABLE bloom_filter_idx ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx2 ( k UInt64, @@ -109,7 +109,7 @@ $CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom # TOKEN BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx3 ( k UInt64, @@ -147,7 +147,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx2" $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx3" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx_na;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx_na ( na Array(Array(String)), @@ -156,7 +156,7 @@ CREATE TABLE bloom_filter_idx_na ORDER BY na" 2>&1 | grep -c 'DB::Exception: Unexpected type Array(Array(String)) of bloom filter index' # NGRAM BF with IPv6 -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_ipv6_idx ( foo IPv6, diff --git a/tests/queries/0_stateless/00942_mutate_index.sh b/tests/queries/0_stateless/00942_mutate_index.sh index 6ebb30c25b9..e1e23639e85 100755 --- a/tests/queries/0_stateless/00942_mutate_index.sh +++ b/tests/queries/0_stateless/00942_mutate_index.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00943_materialize_index.sh b/tests/queries/0_stateless/00943_materialize_index.sh index 6ff7d34a9d7..e4a585fce97 100755 --- a/tests/queries/0_stateless/00943_materialize_index.sh +++ b/tests/queries/0_stateless/00943_materialize_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, @@ -34,7 +34,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO minmax_idx VALUES $CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" ALTER TABLE minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1 SETTINGS mutations_sync = 2;" $CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx IN PARTITION 1 SETTINGS mutations_sync = 2;" diff --git a/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 4655077960f..a12536da239 100755 --- a/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.reference b/tests/queries/0_stateless/00945_bloom_filter_index.reference index e6751fe4762..9d9b49b29c9 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.reference +++ b/tests/queries/0_stateless/00945_bloom_filter_index.reference @@ -227,3 +227,5 @@ 1 value1 1 value2 2 value3 +1 +1 diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index 2b7feacbd98..6e3819e74d3 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -374,3 +374,10 @@ SELECT id, ary[indexOf(ary, 'value2')] FROM test_bf_indexOf WHERE ary[indexOf(ar SELECT id, ary[indexOf(ary, 'value3')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value3')] = 'value3' ORDER BY id FORMAT TSV; DROP TABLE IF EXISTS test_bf_indexOf; + +-- Test for bug #65597 +DROP TABLE IF EXISTS test_bf_cast; +CREATE TABLE test_bf_cast (c Int32, INDEX x1 (c) type bloom_filter) ENGINE = MergeTree ORDER BY c AS SELECT 1; +SELECT count() FROM test_bf_cast WHERE cast(c = 1 OR c = 9999 AS Bool) SETTINGS use_skip_indexes=0; +SELECT count() FROM test_bf_cast WHERE cast(c = 1 OR c = 9999 AS Bool) SETTINGS use_skip_indexes=1; +DROP TABLE test_bf_cast; diff --git a/tests/queries/0_stateless/00961_check_table.reference b/tests/queries/0_stateless/00961_check_table.reference index a0a054898b9..686285bb6aa 100644 --- a/tests/queries/0_stateless/00961_check_table.reference +++ b/tests/queries/0_stateless/00961_check_table.reference @@ -14,4 +14,4 @@ ======== 201902_4_5_1 1 ======== -201801_1_1_0 1 +201801_1_1_2 1 diff --git a/tests/queries/0_stateless/00961_check_table.sql b/tests/queries/0_stateless/00961_check_table.sql index a6abe8103d5..fc3c5435670 100644 --- a/tests/queries/0_stateless/00961_check_table.sql +++ b/tests/queries/0_stateless/00961_check_table.sql @@ -39,6 +39,6 @@ CHECK TABLE mt_table PARTITION 201902 SETTINGS max_threads = 1; SELECT '========'; -CHECK TABLE mt_table PART '201801_1_1_0'; +CHECK TABLE mt_table PART '201801_1_1_2'; DROP TABLE IF EXISTS mt_table; diff --git a/tests/queries/0_stateless/00964_bloom_index_string_functions.sh b/tests/queries/0_stateless/00964_bloom_index_string_functions.sh index e2ec7fd42e4..9e410f09b13 100755 --- a/tests/queries/0_stateless/00964_bloom_index_string_functions.sh +++ b/tests/queries/0_stateless/00964_bloom_index_string_functions.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx;" # NGRAM BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx ( k UInt64, diff --git a/tests/queries/0_stateless/00965_set_index_string_functions.sh b/tests/queries/0_stateless/00965_set_index_string_functions.sh index 8892fb11752..0f29c3dd2f2 100755 --- a/tests/queries/0_stateless/00965_set_index_string_functions.sh +++ b/tests/queries/0_stateless/00965_set_index_string_functions.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( k UInt64, diff --git a/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh b/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh index 389d433c7e2..ba260042f47 100755 --- a/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh +++ b/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS lowString;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS string;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" create table lowString ( a LowCardinality(String), @@ -18,7 +18,7 @@ ENGINE = MergeTree() PARTITION BY toYYYYMM(b) ORDER BY (a)" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" create table string ( a String, diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index cd2f65eb94a..1f90641726f 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -17,6 +17,7 @@ SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FRO SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; SET log_queries = 1; +SET max_rows_to_read = 0; SELECT count(), ignore('test cpu time query profiler') FROM numbers_mt(10000000000); SET log_queries = 0; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/00976_max_execution_speed.sql b/tests/queries/0_stateless/00976_max_execution_speed.sql index 52c3f05ff43..41374712724 100644 --- a/tests/queries/0_stateless/00976_max_execution_speed.sql +++ b/tests/queries/0_stateless/00976_max_execution_speed.sql @@ -1,2 +1,2 @@ -SET max_execution_speed = 1, max_execution_time = 3; +SET max_execution_speed = 1, max_execution_time = 3, max_rows_to_read = 0; SELECT count() FROM system.numbers; -- { serverError TIMEOUT_EXCEEDED } diff --git a/tests/queries/0_stateless/00982_low_cardinality_setting_in_mv.sql b/tests/queries/0_stateless/00982_low_cardinality_setting_in_mv.sql index 7192642bcde..e545dec90b7 100644 --- a/tests/queries/0_stateless/00982_low_cardinality_setting_in_mv.sql +++ b/tests/queries/0_stateless/00982_low_cardinality_setting_in_mv.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS mat_view; CREATE TABLE test1 (a LowCardinality(String)) ENGINE=MergeTree() ORDER BY a; CREATE TABLE test2 (a UInt64) engine=MergeTree() ORDER BY a; -CREATE MATERIALIZED VIEW test_mv TO test2 AS SELECT toUInt64(a = 'test') FROM test1; +CREATE MATERIALIZED VIEW test_mv TO test2 AS SELECT toUInt64(a = 'test') AS a FROM test1; DROP TABLE test_mv; DROP TABLE test1; diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index 407e124f137..3b73e6a0e3c 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings +-- small number of insert threads can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; diff --git a/tests/queries/0_stateless/00990_hasToken.sh b/tests/queries/0_stateless/00990_hasToken.sh index 6a1d4ff5ccf..d79472aa5a5 100755 --- a/tests/queries/0_stateless/00990_hasToken.sh +++ b/tests/queries/0_stateless/00990_hasToken.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/00990_hasToken.python | ${CLICKHOUSE_CLIENT} --max_query_size 1048576 -nm +python3 "$CURDIR"/00990_hasToken.python | ${CLICKHOUSE_CLIENT} --max_query_size 1048576 -m diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index a090be85221..b8f2596f3d5 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest, no-random-settings +SET max_bytes_in_join = 0; +SET max_rows_in_join = 0; SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; diff --git a/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh b/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh index 55bbfb3ff11..54f1bbe29dc 100755 --- a/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh +++ b/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) R1=table_1013_1 R2=table_1013_2 -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS $R1; DROP TABLE IF EXISTS $R2; @@ -19,13 +19,13 @@ ${CLICKHOUSE_CLIENT} -n -q " INSERT INTO $R1 VALUES (1) " -timeout 10s ${CLICKHOUSE_CLIENT} -n -q " +timeout 10s ${CLICKHOUSE_CLIENT} -q " SET receive_timeout=1; SYSTEM SYNC REPLICA $R2 " 2>&1 | grep -F -q "Code: 159. DB::Exception" && echo 'OK' || echo 'Failed!' # By dropping tables all related SYNC REPLICA queries would be terminated as well -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS $R2; DROP TABLE IF EXISTS $R1; " diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh index 4f35b69da0b..053fd9d9d49 100755 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -9,7 +9,7 @@ R1=table_1017_1 R2=table_1017_2 T1=table_1017_merge -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP DICTIONARY IF EXISTS dict1; DROP TABLE IF EXISTS $R1; DROP TABLE IF EXISTS $R2; @@ -68,7 +68,7 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE dictHas('${CLICKHOUSE ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE dictHas('${CLICKHOUSE_DATABASE}.dict1', toUInt64(x))" --allow_nondeterministic_mutations=1 2>&1 \ && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP DICTIONARY IF EXISTS dict1; DROP TABLE IF EXISTS $R2; DROP TABLE IF EXISTS $R1; diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index c13a0859183..eca370d94af 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -7,7 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 -SET use_uncompressed_cache = 0; +SET use_uncompressed_cache = 0; +SET memory_profiler_step = 1; -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; @@ -31,14 +32,14 @@ SELECT 'K=16'; SELECT 'UInt32'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5230000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements SELECT 'UInt64'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5900000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); SELECT 'K=18'; diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index 9a26f78a8ee..5c67fe08fbf 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -14,7 +14,7 @@ declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS points; CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_si rm "${DATA_DIR}"/01037_point_data -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS polygons_array; CREATE TABLE polygons_array @@ -43,7 +43,7 @@ for type in "${SearchTypes[@]}"; do outputFile="${TMP_DIR}/results${type}.out" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP DICTIONARY IF EXISTS dict_array; CREATE DICTIONARY dict_array diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index 47f7a5c1c4f..591978d1129 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -14,7 +14,7 @@ declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " @@ -22,7 +22,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for rm "${DATA_DIR}"/01037_point_data -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS polygons_array; CREATE TABLE polygons_array @@ -42,7 +42,7 @@ for type in "${SearchTypes[@]}"; do outputFile="${TMP_DIR}/results${type}.out" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP DICTIONARY IF EXISTS dict_array; CREATE DICTIONARY dict_array diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index d1ee3f283bc..ac033ff4eb8 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) TMP_DIR=${CLICKHOUSE_TMP}/tmp mkdir -p $TMP_DIR -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS polygons_array; CREATE TABLE polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; @@ -53,7 +53,7 @@ for type in "${SearchTypes[@]}"; do outputFile="${TMP_DIR}/results${type}.out" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP DICTIONARY IF EXISTS dict_array; CREATE DICTIONARY dict_array ( @@ -106,7 +106,7 @@ do diff -q "${CURDIR}/01037_polygon_dicts_simple_functions.ans" "$outputFile" done -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP DICTIONARY dict_array; DROP DICTIONARY dict_tuple; DROP TABLE polygons_array; diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index 2d49664b280..073301104d2 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -67,3 +67,7 @@ SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 2020-01-10 00:00:00 SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 617019bd2c6..fb2b4b4949a 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -36,3 +36,8 @@ SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, I SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); + +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01055_minmax_index_compact_parts.sh b/tests/queries/0_stateless/01055_minmax_index_compact_parts.sh index 0b14ef8f6fa..29ce4da02ed 100755 --- a/tests/queries/0_stateless/01055_minmax_index_compact_parts.sh +++ b/tests/queries/0_stateless/01055_minmax_index_compact_parts.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/01077_mutations_index_consistency.sh b/tests/queries/0_stateless/01077_mutations_index_consistency.sh index ffbe3692b64..f103692de56 100755 --- a/tests/queries/0_stateless/01077_mutations_index_consistency.sh +++ b/tests/queries/0_stateless/01077_mutations_index_consistency.sh @@ -7,13 +7,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement" -$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" +$CLICKHOUSE_CLIENT --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" $CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Asia/Istanbul') + number%(23*3600) from numbers(1000000);" $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE movement FINAL" -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " SELECT count(), toStartOfHour(date) AS Hour @@ -26,7 +26,7 @@ ORDER BY Hour DESC $CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00', 'Asia/Istanbul') and date < toDateTime('2020-01-22T17:00:00', 'Asia/Istanbul') SETTINGS mutations_sync = 2" -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " SELECT count(), toStartOfHour(date) AS Hour @@ -37,7 +37,7 @@ ORDER BY Hour DESC " | grep "16:00:00" | wc -l -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " SELECT count(), toStartOfHour(date) AS Hour @@ -48,7 +48,7 @@ ORDER BY Hour DESC " | grep "22:00:00" | cut -f1 -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " SELECT count(), toStartOfHour(date) AS Hour diff --git a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh index 82b8be65af5..39e65af039b 100755 --- a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh @@ -26,6 +26,10 @@ while [[ $($CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='00000000 sleep 1 done +while [[ $($CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE type='ALTER_METADATA' AND database = '$CLICKHOUSE_DATABASE'" 2>&1) ]]; do + sleep 1 +done + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE table_for_bad_alters;" # Type changed, but we can revert back $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_bad_alters VALUES(2, 2, 7)" diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh index 8b8e794c8ff..2e6cc7e2520 100755 --- a/tests/queries/0_stateless/01086_window_view_cleanup.sh +++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh @@ -13,7 +13,8 @@ opts=( DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary" -$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 --multiquery " +$CLICKHOUSE_CLIENT "${opts[@]}" --query " + SET allow_deprecated_database_ordinary = 1; SET allow_experimental_window_view = 1; SET window_view_clean_interval = 1; @@ -28,8 +29,7 @@ $CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 --multiqu INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 2, toDateTime('1990/01/01 12:00:01', 'US/Samoa')); INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 3, toDateTime('1990/01/01 12:00:02', 'US/Samoa')); INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 4, toDateTime('1990/01/01 12:00:05', 'US/Samoa')); - INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa')); -" + INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" while true; do $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM ${DATABASE_ORDINARY}.\`.inner.wv\`" | grep -q "5" && break || sleep .5 ||: diff --git a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql index 21a84bdd691..45f1a00ae23 100644 --- a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql +++ b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql @@ -1,4 +1,4 @@ -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug -SET query_profiler_cpu_time_period_ns = 1; +SET query_profiler_cpu_time_period_ns = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(1000000000); diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql index a53b60a5ad3..2b301d7aced 100644 --- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql @@ -1,8 +1,9 @@ drop table if exists t; create table t(n int, a Int64, s String) engine = MergeTree() order by a; -set enable_positional_arguments=0; -set optimize_trivial_insert_select=1; +set enable_positional_arguments = 0; +set optimize_trivial_insert_select = 1; +set max_rows_to_read = 0; -- due to aggregate functions, optimize_trivial_insert_select will not be applied insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2; diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sh old mode 100644 new mode 100755 similarity index 74% rename from tests/queries/0_stateless/01119_session_log.sql rename to tests/queries/0_stateless/01119_session_log.sh index 55f6228797a..2d17b545276 --- a/tests/queries/0_stateless/01119_session_log.sql +++ b/tests/queries/0_stateless/01119_session_log.sh @@ -1,5 +1,20 @@ --- Tags: no-fasttest +#!/usr/bin/env bash +# Tags: no-fasttest +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +session_log_query_prefix=" +system flush logs; +select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log +where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') +and interface in ('HTTP', 'TCP', 'TCP_Interserver') +and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) +and event_time >= now() - interval 5 minute" + +$CLICKHOUSE_CLIENT -nm -q " select * from remote('127.0.0.2', system, one, 'default', ''); select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED } select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED } @@ -16,9 +31,17 @@ select * from url('http://127.0.0.1:8123/?query=select+1&user=+++', LineAsString select * from cluster('test_cluster_interserver_secret', system, one); -system flush logs; -select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log -where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') -and interface in ('HTTP', 'TCP', 'TCP_Interserver') -and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) -and event_time >= now() - interval 5 minute order by type, user, interface; +$session_log_query_prefix and type != 'Logout' order by type, user, interface; +" + +# Wait for logout events. +for _ in {1..10} +do + if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ] + then + break + fi + sleep 2 +done + +$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout' order by user, interface" diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh index acb6522e9e2..b72acf62610 100755 --- a/tests/queries/0_stateless/01160_table_dependencies.sh +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -35,7 +35,7 @@ arraySort(loading_dependencies_table), arraySort(loading_dependent_table) from s $CLICKHOUSE_CLIENT -q "select '====='" $CLICKHOUSE_CLIENT -q "alter table t add column x int default in(1, $CLICKHOUSE_DATABASE.s), drop column y" -$CLICKHOUSE_CLIENT -q "create materialized view mv to s as select n from t where n in (select n from join)" +$CLICKHOUSE_CLIENT -q "create materialized view mv to s as select n as x from t where n in (select n from join)" $CLICKHOUSE_CLIENT -q "select table, arraySort(dependencies_table), arraySort(loading_dependencies_table), arraySort(loading_dependent_table) from system.tables where database=currentDatabase() order by table" diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh index 739df782a39..d4a80d074dc 100755 --- a/tests/queries/0_stateless/01161_all_system_tables.sh +++ b/tests/queries/0_stateless/01161_all_system_tables.sh @@ -19,7 +19,7 @@ function run_selects() thread_num=$1 readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models' - AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'") + AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher' AND name != 'coverage_log'") for t in "${tables_arr[@]}" do diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index 24083d7d40b..0fd73c7bcec 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -16,25 +16,25 @@ 7 all_3_3_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 7 all_4_4_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 8 1 -1 1 AddPart 1 1 1 1 all_1_1_0 -2 1 Begin 1 1 1 1 -2 1 AddPart 1 1 1 1 all_2_2_0 -2 1 Rollback 1 1 1 1 -3 1 Begin 1 1 1 1 -3 1 AddPart 1 1 1 1 all_3_3_0 -3 1 Commit 1 1 1 0 -1 1 LockPart 1 1 1 1 all_2_2_0 -4 1 Begin 1 1 1 1 -4 1 AddPart 1 1 1 1 all_4_4_0 -4 1 Commit 1 1 1 0 -5 1 Begin 1 1 1 1 -5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 LockPart 1 1 1 1 all_5_5_0 -5 1 UnlockPart 1 1 1 1 all_1_1_0 -5 1 UnlockPart 1 1 1 1 all_3_3_0 -5 1 UnlockPart 1 1 1 1 all_4_4_0 -5 1 UnlockPart 1 1 1 1 all_5_5_0 -5 1 Rollback 1 1 1 1 +1 AddPart 1 1 1 1 all_1_1_0 +2 Begin 1 1 1 1 +2 AddPart 1 1 1 1 all_2_2_0 +2 Rollback 1 1 1 1 +3 Begin 1 1 1 1 +3 AddPart 1 1 1 1 all_3_3_0 +3 Commit 1 1 1 0 +1 LockPart 1 1 1 1 all_2_2_0 +4 Begin 1 1 1 1 +4 AddPart 1 1 1 1 all_4_4_0 +4 Commit 1 1 1 0 +5 Begin 1 1 1 1 +5 AddPart 1 1 1 1 all_5_5_0 +5 LockPart 1 1 1 1 all_1_1_0 +5 LockPart 1 1 1 1 all_3_3_0 +5 LockPart 1 1 1 1 all_4_4_0 +5 LockPart 1 1 1 1 all_5_5_0 +5 UnlockPart 1 1 1 1 all_1_1_0 +5 UnlockPart 1 1 1 1 all_3_3_0 +5 UnlockPart 1 1 1 1 all_4_4_0 +5 UnlockPart 1 1 1 1 all_5_5_0 +5 Rollback 1 1 1 1 diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql index a809e4196e9..581b45cd15c 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.sql +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -42,7 +42,6 @@ rollback; system flush logs; select indexOf((select arraySort(groupUniqArray(tid)) from system.transactions_info_log where database=currentDatabase() and table='txn_counters'), tid), - (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, type, thread_id!=0, length(query_id)=length(queryID()) or type='Commit' and query_id='', -- ignore fault injection after commit diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index 42f596c45d6..f6c6fd0be34 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -7,11 +7,11 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';" -$CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';" -$CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT -m -q "select value, changed from system.settings where name='readonly';" +$CLICKHOUSE_CLIENT -m -q "set profile='default'; select value, changed from system.settings where name='readonly';" +$CLICKHOUSE_CLIENT -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') -$CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" +$CLICKHOUSE_CLIENT -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&profile=default&query=select+value,changed+from+system.settings+where+name='readonly'" diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index c5012dabc81..be95e5a7d4b 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -27,6 +27,7 @@ RENAME DICTIONARY test_01191.t TO test_01191.dict1; -- {serverError INCORRECT_QU DROP DICTIONARY test_01191.t; -- {serverError INCORRECT_QUERY} DROP TABLE test_01191.t; +DROP DATABASE IF EXISTS dummy_db; CREATE DATABASE dummy_db ENGINE=Atomic; RENAME DICTIONARY test_01191.dict TO dummy_db.dict1; RENAME DICTIONARY dummy_db.dict1 TO test_01191.dict; diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.sql b/tests/queries/0_stateless/01245_limit_infinite_sources.sql index 05680d86a33..69c93baf8a8 100644 --- a/tests/queries/0_stateless/01245_limit_infinite_sources.sql +++ b/tests/queries/0_stateless/01245_limit_infinite_sources.sql @@ -9,3 +9,4 @@ FROM ) WHERE number = 1 LIMIT 1 +SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh index aea91a0bf6b..3c7b9038e1f 100755 --- a/tests/queries/0_stateless/01246_buffer_flush.sh +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -27,7 +27,7 @@ function wait_until() function get_buffer_delay() { local buffer_insert_id=$1 && shift - query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" query " WITH (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh index 551e11c8c8d..775b7825a16 100755 --- a/tests/queries/0_stateless/01249_flush_interactive.sh +++ b/tests/queries/0_stateless/01249_flush_interactive.sh @@ -14,10 +14,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test() { - timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query " + timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --max_rows_to_read 0 --query " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' - timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary " + timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10&max_rows_to_read=0" --data-binary " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' } diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh index 5f82731c54e..1ec53399958 100755 --- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -10,7 +10,7 @@ set -o pipefail # shellcheck disable=SC2120 function execute() { - ${CLICKHOUSE_CLIENT} -n "$@" + ${CLICKHOUSE_CLIENT} "$@" } # diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index da2dc48c3e1..290d6a0c759 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 187f55697e4..c4c3473ee18 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -5,5 +5,7 @@ connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 +max_memory_usage UInt64 5000000000 +max_memory_usage_for_user UInt64 32000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference index b20e7415f52..6282bf366d0 100644 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference @@ -1,2 +1,2 @@ -Memory limit (for query) exceeded +Memory limit exceeded Ok diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index d74092d828d..5b7cba77432 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) start=$SECONDS # If the memory leak exists, it will lead to OOM fairly quickly. for _ in {1..1000}; do - $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; + $CLICKHOUSE_CLIENT --max_memory_usage 1G --max_rows_to_read 0 <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; # NOTE: we cannot use timeout here since this will not guarantee that the query will be executed at least once. # (since graceful wait of clickhouse-client had been reverted) @@ -16,5 +16,5 @@ for _ in {1..1000}; do if [[ $elapsed -gt 30 ]]; then break fi -done 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq +done 2>&1 | grep -o -P 'Memory limit .+ exceeded' | sed -r -e 's/(Memory limit)(.+)( exceeded)/\1\3/' | uniq echo 'Ok' diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 1241f299d94..867c37667fe 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,18 +1,19 @@ #!/usr/bin/env bash -# Tags: long, no-object-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build, no-flaky-check +# It can be too long with ThreadFuzzer CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query " +$CLICKHOUSE_CLIENT --max_rows_to_read 50M --query " DROP TABLE IF EXISTS bug; CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi', merge_max_block_size = 8192; INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" LOG="$CLICKHOUSE_TMP/err-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" +$CLICKHOUSE_BENCHMARK --max_rows_to_read 51M --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" cat "$LOG" | grep Exception cat "$LOG" | grep Loaded diff --git a/tests/queries/0_stateless/01307_multiple_leaders_zookeeper.sh b/tests/queries/0_stateless/01307_multiple_leaders_zookeeper.sh index db986e74b6b..02aa0f76be5 100755 --- a/tests/queries/0_stateless/01307_multiple_leaders_zookeeper.sh +++ b/tests/queries/0_stateless/01307_multiple_leaders_zookeeper.sh @@ -12,8 +12,8 @@ DATA_SIZE=200 SEQ=$(seq 0 $(($NUM_REPLICAS - 1))) -for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "DROP TABLE IF EXISTS r$REPLICA"; done -for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "CREATE TABLE r$REPLICA (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r$REPLICA') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M';"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT --query "CREATE TABLE r$REPLICA (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r$REPLICA') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M';"; done function thread() { @@ -30,6 +30,6 @@ done wait -for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "SYSTEM SYNC REPLICA r$REPLICA"; done -for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "SELECT count(), sum(x) FROM r$REPLICA"; done -for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "DROP TABLE r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT --query "SELECT count(), sum(x) FROM r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT --query "DROP TABLE r$REPLICA"; done diff --git a/tests/queries/0_stateless/01395_limit_more_cases.reference b/tests/queries/0_stateless/01395_limit_more_cases.reference index c9d0dd73ab8..d68b987ea19 100644 --- a/tests/queries/0_stateless/01395_limit_more_cases.reference +++ b/tests/queries/0_stateless/01395_limit_more_cases.reference @@ -254,4 +254,3 @@ 15 13 0 0 0 0 0 0 15 14 0 0 0 0 0 0 15 15 0 0 0 0 0 0 -0 0 0 diff --git a/tests/queries/0_stateless/01395_limit_more_cases.sh b/tests/queries/0_stateless/01395_limit_more_cases.sh index 177147d2142..9709bd74f26 100755 --- a/tests/queries/0_stateless/01395_limit_more_cases.sh +++ b/tests/queries/0_stateless/01395_limit_more_cases.sh @@ -9,8 +9,11 @@ SIZE=13 for OFFSET in {0..15}; do for LIMIT in {0..15}; do echo "SELECT - $OFFSET, $LIMIT, - count() AS c, min(number) AS first, max(number) AS last, + $OFFSET, + $LIMIT, + count() AS c, + min(number) AS first, + max(number) AS last, throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) @@ -18,20 +21,3 @@ for OFFSET in {0..15}; do " done done | $CLICKHOUSE_CLIENT -n --max_block_size 5 - -# Randomized test - -ITERATIONS=1000 -for _ in $(seq $ITERATIONS); do - SIZE=$(($RANDOM % 100)) - OFFSET=$(($RANDOM % 111)) - LIMIT=$(($RANDOM % 111)) - - echo "WITH count() AS c, min(number) AS first, max(number) AS last - SELECT - throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), - throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), - throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) - FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT); - " -done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.reference b/tests/queries/0_stateless/01395_limit_more_cases_random.reference new file mode 100644 index 00000000000..06b63ea6c2f --- /dev/null +++ b/tests/queries/0_stateless/01395_limit_more_cases_random.reference @@ -0,0 +1 @@ +0 0 0 diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.sh b/tests/queries/0_stateless/01395_limit_more_cases_random.sh new file mode 100755 index 00000000000..c2f6b060aab --- /dev/null +++ b/tests/queries/0_stateless/01395_limit_more_cases_random.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SIZE=13 +ITERATIONS=300 +for _ in $(seq $ITERATIONS); do + SIZE=$(($RANDOM % 100)) + OFFSET=$(($RANDOM % 111)) + LIMIT=$(($RANDOM % 111)) + + echo "WITH count() AS c, min(number) AS first, max(number) AS last + SELECT + throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), + throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), + throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) + FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT); + " +done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index bff85b3e29f..eca2db359bb 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -13,7 +13,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") SCALE=1000 -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS r1; DROP TABLE IF EXISTS r2; CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x @@ -23,11 +23,11 @@ $CLICKHOUSE_CLIENT -n --query " DETACH TABLE r2; " -$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts) +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" # Now wait for cleanup thread - for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; @@ -46,7 +46,7 @@ $CLICKHOUSE_CLIENT --receive_timeout 600 --query "SYSTEM SYNC REPLICA r2" # Need $CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'"; -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS r1; DROP TABLE IF EXISTS r2; " diff --git a/tests/queries/0_stateless/01415_sticking_mutations.sh b/tests/queries/0_stateless/01415_sticking_mutations.sh index b7c8768a65d..97467c3ce9d 100755 --- a/tests/queries/0_stateless/01415_sticking_mutations.sh +++ b/tests/queries/0_stateless/01415_sticking_mutations.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS sticking_mutations" function check_sticky_mutations() { - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE sticking_mutations ( + $CLICKHOUSE_CLIENT --query "CREATE TABLE sticking_mutations ( date Date, key UInt64, value1 String, diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference index c98c950d535..dc785e57851 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.reference +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -1,2 +1,2 @@ -FF -FF +EFBFBD +EFBFBD diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql index d6a299225b1..3a28e023805 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.sql +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT hex(lowerUTF8('\xFF')); SELECT hex(upperUTF8('\xFF')); diff --git a/tests/queries/0_stateless/01451_dist_logs.sh b/tests/queries/0_stateless/01451_dist_logs.sh index 23dee7a827d..e281e232bb5 100755 --- a/tests/queries/0_stateless/01451_dist_logs.sh +++ b/tests/queries/0_stateless/01451_dist_logs.sh @@ -10,4 +10,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # triggered not for the first query for _ in {1..20}; do echo "select * from remote('127.{2,3}', system.numbers) where number = 10 limit 1;" -done | ${CLICKHOUSE_CLIENT} -n 2>/dev/null +done | ${CLICKHOUSE_CLIENT} 2>/dev/null diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 2b6da6132ed..30940f93a56 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function check_log { -${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq " +${CLICKHOUSE_CLIENT} --format=JSONEachRow -q " set enable_analyzer = 1; system flush logs; diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference index 954dfe36563..86561570985 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.reference +++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference @@ -1,12 +1,16 @@ CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas.sh index 56620d848a3..cc574557438 100755 --- a/tests/queries/0_stateless/01459_manual_write_to_replicas.sh +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) NUM_REPLICAS=10 for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS r$i SYNC; CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x; " @@ -31,7 +31,7 @@ done wait for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " SYSTEM SYNC REPLICA r$i; SELECT count(), min(x), max(x), sum(x) FROM r$i;" done diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh index 91a73471557..24ea3ba3835 100755 --- a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum.sh @@ -16,7 +16,7 @@ unset CLICKHOUSE_WRITE_COVERAGE NUM_REPLICAS=10 for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS r$i SYNC; CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x; " @@ -39,7 +39,7 @@ done wait for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " SYSTEM SYNC REPLICA r$i; SELECT count(), min(x), max(x), sum(x) FROM r$i;" done diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh index 1f76a2efc6b..a2ef0d52328 100755 --- a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh @@ -11,7 +11,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) NUM_REPLICAS=6 for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS r$i SYNC; CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x; " @@ -36,7 +36,7 @@ done wait for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " SYSTEM SYNC REPLICA r$i; SELECT count(), min(x), max(x), sum(x) FROM r$i;" done diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index 5c8c47c9127..a4e99d51970 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,5 +1,7 @@ -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug +SET max_rows_to_read = '100M'; + select count() from ( select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.reference b/tests/queries/0_stateless/01504_compression_multiple_streams.reference index 4d3aba66526..14cdce72044 100644 --- a/tests/queries/0_stateless/01504_compression_multiple_streams.reference +++ b/tests/queries/0_stateless/01504_compression_multiple_streams.reference @@ -1,20 +1,20 @@ 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) diff --git a/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh b/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh index 29593ea4fb5..6954fef7314 100755 --- a/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh +++ b/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh @@ -34,7 +34,7 @@ done # Check access rights -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP DATABASE IF EXISTS test; CREATE DATABASE test; USE test; diff --git a/tests/queries/0_stateless/01508_format_regexp_raw.sh b/tests/queries/0_stateless/01508_format_regexp_raw.sh index 8cf1bd73566..52613c28b2f 100755 --- a/tests/queries/0_stateless/01508_format_regexp_raw.sh +++ b/tests/queries/0_stateless/01508_format_regexp_raw.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t; CREATE TABLE t (a String, b String) ENGINE = Memory; " @@ -12,7 +12,7 @@ CREATE TABLE t (a String, b String) ENGINE = Memory; ${CLICKHOUSE_CLIENT} --format_regexp_escaping_rule 'Raw' --format_regexp '^(.+?) separator (.+?)$' --query ' INSERT INTO t FORMAT Regexp abc\ separator Hello, world!' -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t; DROP TABLE t; " diff --git a/tests/queries/0_stateless/01509_dictionary_preallocate.sh b/tests/queries/0_stateless/01509_dictionary_preallocate.sh index 2a22a307a08..0459f69b0ad 100755 --- a/tests/queries/0_stateless/01509_dictionary_preallocate.sh +++ b/tests/queries/0_stateless/01509_dictionary_preallocate.sh @@ -15,7 +15,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # PREALLOCATE attribute (and also for the history/greppability, that it was # such). -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS data_01509; DROP DICTIONARY IF EXISTS dict_01509; CREATE TABLE data_01509 diff --git a/tests/queries/0_stateless/01509_format_raw_blob.reference b/tests/queries/0_stateless/01509_format_raw_blob.reference index 05014001bd9..eb074457e07 100644 --- a/tests/queries/0_stateless/01509_format_raw_blob.reference +++ b/tests/queries/0_stateless/01509_format_raw_blob.reference @@ -1,2 +1,2 @@ -9fd46251e5574c633cbfbb9293671888 - -9fd46251e5574c633cbfbb9293671888 - +48fad37bc89fc3bcc29c4750897b6709 - +48fad37bc89fc3bcc29c4750897b6709 - diff --git a/tests/queries/0_stateless/01509_format_raw_blob.sh b/tests/queries/0_stateless/01509_format_raw_blob.sh index 3d1d3fbb17b..355928014e8 100755 --- a/tests/queries/0_stateless/01509_format_raw_blob.sh +++ b/tests/queries/0_stateless/01509_format_raw_blob.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t; CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory; " @@ -12,7 +12,7 @@ CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory; ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT RawBLOB" < ${BASH_SOURCE[0]} cat ${BASH_SOURCE[0]} | md5sum -${CLICKHOUSE_CLIENT} -n --query "SELECT * FROM t FORMAT RawBLOB" | md5sum +${CLICKHOUSE_CLIENT} --query "SELECT * FROM t FORMAT RawBLOB" | md5sum ${CLICKHOUSE_CLIENT} --query " DROP TABLE t; diff --git a/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh index 594caca7d04..dc178d081bf 100755 --- a/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh +++ b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t; CREATE TABLE t (a String, b LowCardinality(Nullable(String))) ENGINE = Memory; " @@ -12,7 +12,7 @@ CREATE TABLE t (a String, b LowCardinality(Nullable(String))) ENGINE = Memory; ${CLICKHOUSE_CLIENT} --format_regexp_escaping_rule 'Raw' --format_regexp '^(.+?) separator (.+?)$' --query ' INSERT INTO t FORMAT Regexp abc\ separator Hello, world!' -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t; DROP TABLE t; " diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh index e77764ee34e..8ba27a04d60 100755 --- a/tests/queries/0_stateless/01526_initial_query_id.sh +++ b/tests/queries/0_stateless/01526_initial_query_id.sh @@ -15,7 +15,7 @@ ${CLICKHOUSE_CURL} \ --get \ --data-urlencode "query=select 1 format Null" -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " system flush logs; select interface, initial_query_id = query_id from system.query_log diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference index c6c4dcdfa4a..c23cc57548b 100644 --- a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference @@ -1,12 +1,13 @@ CREATE TABLE test ( `a` Int64, - `b` NESTED(a Int64) + `b` Nested(a Int64) ) ENGINE = TinyLog CREATE TABLE test ( `a` Int64, - `b` TUPLE(a Int64) + `b` Tuple( + a Int64) ) ENGINE = TinyLog diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.sh b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh index 99e3aed2825..9065af17dc1 100755 --- a/tests/queries/0_stateless/01548_create_table_compound_column_format.sh +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo "CREATE TABLE test(a Int64, b NESTED(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT +echo "CREATE TABLE test(a Int64, b Nested(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT -echo "CREATE TABLE test(a Int64, b TUPLE(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT \ No newline at end of file +echo "CREATE TABLE test(a Int64, b Tuple(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT \ No newline at end of file diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 4713120287d..0f84d510933 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -19,7 +19,13 @@ def test_and_check(name, a, b, t_stat, p_value): ) client.query( "INSERT INTO mann_whitney VALUES {};".format( - ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + + client.query( + "INSERT INTO mann_whitney VALUES {};".format( + ", ".join(["({},{})".format(i, 1) for i in b]) ) ) @@ -59,6 +65,15 @@ def test_mann_whitney(): test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p) +def test_mann_whitney_skew(): + rvs1 = [1] + rvs2 = [0, 2, 4] + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") + test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) + test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p) + + if __name__ == "__main__": test_mann_whitney() + test_mann_whitney_skew() print("Ok.") diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index 6253840c63c..f08ef911da4 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,7 +24,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -m --history_file=$history_file --highlight 0" expect "\n:) " send -- "DROP TABLE IF EXISTS t01565;\r" diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index b38cbb7d188..5ec4f412d7f 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + -- -- countSubstrings -- diff --git a/tests/queries/0_stateless/01599_mutation_query_params.sh b/tests/queries/0_stateless/01599_mutation_query_params.sh index 52b0131a9c2..5b604c96028 100755 --- a/tests/queries/0_stateless/01599_mutation_query_params.sh +++ b/tests/queries/0_stateless/01599_mutation_query_params.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS test; CREATE TABLE test diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 47b5a4dea13..0a9f94cc451 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,50 +1,63 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# NOTE: database = $CLICKHOUSE_DATABASE is unwanted -verify_sql="SELECT - (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) - = (SELECT sum(active), sum(NOT active) FROM - (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" +function query() +{ + # NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*" +} + # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition. But it should get expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { - for i in {1..5000} - do - result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" ) - [ "$result" = "1" ] && echo "$result" && break - sleep 0.1 + local result - if [[ $i -eq 5000 ]] - then - $CLICKHOUSE_CLIENT " - SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; - SELECT sum(active), sum(NOT active) FROM system.parts; - SELECT sum(active), sum(NOT active) FROM system.projection_parts; - SELECT count() FROM system.dropped_tables_parts; - " + for _ in {1..100}; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + result=$( query "SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = + (SELECT sum(active), sum(NOT active) FROM ( + SELECT active FROM system.parts + UNION ALL SELECT active FROM system.projection_parts + UNION ALL SELECT 1 FROM system.dropped_tables_parts + ))" + ) + + if [ "$result" = "1" ]; then + echo "$result" + return fi + + sleep 0.5 done + + $CLICKHOUSE_CLIENT -q " + SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; + SELECT sum(active), sum(NOT active) FROM system.parts; + SELECT sum(active), sum(NOT active) FROM system.projection_parts; + SELECT count() FROM system.dropped_tables_parts; + " } -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" +query "DROP TABLE IF EXISTS test_table" +query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +query "INSERT INTO test_table VALUES ('1992-01-01')" verify -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +query "INSERT INTO test_table VALUES ('1992-01-02')" verify -$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +query "OPTIMIZE TABLE test_table FINAL" verify -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +query "DROP TABLE test_table" verify diff --git a/tests/queries/0_stateless/01600_quota_by_forwarded_ip.sh b/tests/queries/0_stateless/01600_quota_by_forwarded_ip.sh index 1d768c8b027..834eba8f25c 100755 --- a/tests/queries/0_stateless/01600_quota_by_forwarded_ip.sh +++ b/tests/queries/0_stateless/01600_quota_by_forwarded_ip.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " CREATE USER quoted_by_ip_${CLICKHOUSE_DATABASE}; CREATE USER quoted_by_forwarded_ip_${CLICKHOUSE_DATABASE}; @@ -57,7 +57,7 @@ ${CLICKHOUSE_CURL} -H 'X-Forwarded-For: 5.6.7.8, 1.2.3.4' -sS "${CLICKHOUSE_URL} ${CLICKHOUSE_CURL} -H 'X-Forwarded-For: 1.2.3.4, 5.6.7.8' -sS "${CLICKHOUSE_URL}&user=quoted_by_forwarded_ip_${CLICKHOUSE_DATABASE}" -d "SELECT count() FROM numbers(10)" -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP QUOTA IF EXISTS quota_by_ip_${CLICKHOUSE_DATABASE}; DROP QUOTA IF EXISTS quota_by_forwarded_ip; diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 8b97f3514b3..8a6fa9b7845 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,15 +1,17 @@ --- Tags: long, no-tsan, no-distributed-cache --- Tag no-tsan: Too long for TSan +-- Tags: long, no-tsan, no-msan, no-distributed-cache +-- Too long for TSan and MSan set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; +set max_rows_to_read = '30M'; + drop table if exists t; create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t SELECT number, if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '') -FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8; +FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8, max_rows_to_read=0; -- optimize table t final; diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference index 47b3133ceca..44531c19ab7 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference @@ -17,7 +17,7 @@ used_functions ['repeat'] arraySort(used_data_type_families) -['Array','Int32','Nullable','String'] +['Int32','Nullable','String'] used_database_engines ['Atomic'] diff --git a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh index 0e5c2862066..6a7eb975c87 100755 --- a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh +++ b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP DATABASE IF EXISTS 01684_database_for_cache_dictionary; CREATE DATABASE 01684_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh index 55061b9a643..c2d222a86ea 100755 --- a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh +++ b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE complex_key_simple_attributes_source_table ( id UInt64, diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.sh b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh index f8004f9350d..5d115e09a79 100755 --- a/tests/queries/0_stateless/01691_parser_data_type_exponential.sh +++ b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # Check that DataType parser does not have exponential complexity in the case found by fuzzer. -for _ in {1..10}; do ${CLICKHOUSE_CLIENT} -n --query "SELECT CAST(1 AS A2222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222220000000000000000000000000000000000000000000000000000000000000000000000000000002260637443813394204 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggre222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 22222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 2222222222222eFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222, 222222222222222ggregateFuncpion(groupBitmap222222222222222222222222222222222222222222222222222222222222222222222222000000000000000000001788596394540167623 222222222222222222ggregateFu22222222222222222222222222 222222222, UInt33)); -- { clientError 62 }"; done +for _ in {1..10}; do ${CLICKHOUSE_CLIENT} --query "SELECT CAST(1 AS A2222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222220000000000000000000000000000000000000000000000000000000000000000000000000000002260637443813394204 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggre222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 22222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 2222222222222eFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222, 222222222222222ggregateFuncpion(groupBitmap222222222222222222222222222222222222222222222222222222222222222222222222000000000000000000001788596394540167623 222222222222222222ggregateFu22222222222222222222222222 222222222, UInt33)); -- { clientError 62 }"; done diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference index 25aa9dc5dec..37993873983 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference @@ -1,2 +1,3 @@ 3 950 990 500 2000 +[950] [999] diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql index 5375823aa8e..956bf3711a2 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql @@ -29,4 +29,6 @@ FROM cluster('test_cluster_two_shards', currentDatabase(), r) WHERE a = 'x' settings prefer_localhost_replica=0; +SELECT quantilesTimingMerge(0.95)(q), quantilesTimingMerge(toInt64(1))(q) FROM remote('127.0.0.{1,2}', currentDatabase(), r); + DROP TABLE r; diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 0f80d659e92..0d745e44b10 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-parallel +-- set no-parallel tag is to prevent timeout of this test drop table if exists t; diff --git a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh index a166837e01a..f38e53f898a 100755 --- a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS in_order_agg_01710; CREATE TABLE in_order_agg_01710 diff --git a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh index ee73974e8a4..01537524730 100755 --- a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh @@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS in_order_agg_partial_01710; CREATE TABLE in_order_agg_partial_01710 diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6625ad916e8..83a26c83005 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,6 +1,7 @@ -- Tags: long, distributed, no-random-settings drop table if exists data_01730; +SET max_rows_to_read = 0, max_result_rows = 0, max_bytes_before_external_group_by = 0; -- does not use 127.1 due to prefer_localhost_replica diff --git a/tests/queries/0_stateless/01753_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01753_optimize_aggregation_in_order.sh index 2a7345f4865..f9681ebe4f5 100755 --- a/tests/queries/0_stateless/01753_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01753_optimize_aggregation_in_order.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -nm -q " +$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -m -q " drop table if exists data_01753; create table data_01753 (key Int) engine=MergeTree() order by key as select * from numbers(8); select * from data_01753 group by key settings max_block_size=1; diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 74a0356b11e..fc1ddcae595 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -49,7 +49,7 @@ order by query; tuple(2) select 'optimize_skip_unused_shards_rewrite_in(2,)'; optimize_skip_unused_shards_rewrite_in(2,) -with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and @@ -59,10 +59,10 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_2%') and type = 'QueryFinish' order by query; - tuple(2) + (2) select 'optimize_skip_unused_shards_rewrite_in(0,)'; optimize_skip_unused_shards_rewrite_in(0,) -with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); +with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0); 0 0 system flush logs; select splitByString('IN', query)[-1] from system.query_log where @@ -73,7 +73,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_00%') and type = 'QueryFinish' order by query; - tuple(0) + (0) -- signed column select 'signed column'; signed column diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index bcbedeb3ada..0759fb93a44 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -63,7 +63,7 @@ select splitByString('IN', query)[-1] from system.query_log where order by query; select 'optimize_skip_unused_shards_rewrite_in(2,)'; -with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and @@ -75,7 +75,7 @@ select splitByString('IN', query)[-1] from system.query_log where order by query; select 'optimize_skip_unused_shards_rewrite_in(0,)'; -with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); +with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and diff --git a/tests/queries/0_stateless/01758_optimize_skip_unused_shards_once.sh b/tests/queries/0_stateless/01758_optimize_skip_unused_shards_once.sh index b963f3a618f..3c9e12f780b 100755 --- a/tests/queries/0_stateless/01758_optimize_skip_unused_shards_once.sh +++ b/tests/queries/0_stateless/01758_optimize_skip_unused_shards_once.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --optimize_skip_unused_shards=1 -nm -q " +$CLICKHOUSE_CLIENT --optimize_skip_unused_shards=1 -m -q " create table dist_01758 as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy); select * from dist_01758 where dummy = 0 format Null; " |& grep -o "StorageDistributed (dist_01758).*" diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh index 9c51b82282c..ee46f8194b9 100755 --- a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh +++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 -nm -q " +$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 -m -q " DROP TABLE IF EXISTS tmp_01683; DROP TABLE IF EXISTS dist_01683; diff --git a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh index 6a5949741ab..7878867e159 100755 --- a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh +++ b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh @@ -17,7 +17,7 @@ function wait_with_limit() done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_01811; drop table if exists buffer_01811; @@ -39,9 +39,9 @@ $CLICKHOUSE_CLIENT -nm -q " # wait for background buffer flush wait_with_limit 30 '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01811") -gt 0 ]]' -$CLICKHOUSE_CLIENT -nm -q "select count() from data_01811" +$CLICKHOUSE_CLIENT -m -q "select count() from data_01811" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table buffer_01811; drop table data_01811; " diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index 4b75102e9cf..f3e8ceffff6 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -13,7 +13,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function setup() { - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " drop table if exists data_01814; drop table if exists dist_01814; @@ -24,7 +24,7 @@ function setup() function cleanup() { - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " drop table data_01814; drop table dist_01814; " @@ -67,7 +67,7 @@ function test_distributed_push_down_limit_with_query_log() $CLICKHOUSE_CLIENT "${settings_and_opts[@]}" -q "select * from $table group by key limit $offset, 10" - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " system flush logs; select read_rows from system.query_log where diff --git a/tests/queries/0_stateless/01825_new_type_json_10.reference b/tests/queries/0_stateless/01825_new_type_json_10.reference new file mode 100644 index 00000000000..d70c8210914 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_10.reference @@ -0,0 +1,13 @@ +('a.b','Int64') +('a.c','Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))') +('d','Int64') +('e','Array(Nullable(Int64))') +('f','Int64') +{"o":{"a":{"b":"1","c":[{"d":"10","e":["31"]},{"d":"20","e":["63","127"]}]}}} +{"o":{"a":{"b":"2","c":[]}}} +{"o":{"a":{"b":"3","c":[{"e":["32"],"f":"20"},{"e":["64","128"],"f":"30"}]}}} +{"o":{"a":{"b":"4","c":[]}}} +1 [10,20] [[31],[63,127]] [NULL,NULL] +2 [] [] [] +3 [NULL,NULL] [[32],[64,128]] [20,30] +4 [] [] [] diff --git a/tests/queries/0_stateless/01825_new_type_json_10.sql b/tests/queries/0_stateless/01825_new_type_json_10.sql new file mode 100644 index 00000000000..f586cc4477b --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_10.sql @@ -0,0 +1,17 @@ +-- Tags: no-fasttest + +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS t_json_10; +CREATE TABLE t_json_10 (o JSON) ENGINE = Memory; + +INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 1, "c": [{"d": 10, "e": [31]}, {"d": 20, "e": [63, 127]}]}} {"a": {"b": 2, "c": []}} + +INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 3, "c": [{"f": 20, "e": [32]}, {"f": 30, "e": [64, 128]}]}} {"a": {"b": 4, "c": []}} + +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(o)) as path FROM t_json_10 order by path; +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(o.a.c.:`Array(JSON)`))) as path FROM t_json_10 order by path; +SELECT o FROM t_json_10 ORDER BY o.a.b FORMAT JSONEachRow; +SELECT o.a.b, o.a.c.:`Array(JSON)`.d, o.a.c.:`Array(JSON)`.e, o.a.c.:`Array(JSON)`.f FROM t_json_10 ORDER BY o.a.b; + +DROP TABLE t_json_10; diff --git a/tests/queries/0_stateless/01825_new_type_json_11.reference b/tests/queries/0_stateless/01825_new_type_json_11.reference new file mode 100644 index 00000000000..aa3375a23cb --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_11.reference @@ -0,0 +1,13 @@ +('id','Int64') +('key_1','Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))') +('key_2','Int64') +('key_3','Array(JSON(max_dynamic_types=8, max_dynamic_paths=64))') +('key_4','Array(JSON(max_dynamic_types=4, max_dynamic_paths=16))') +('key_7','Int64') +('key_5','Int64') +{"obj":{"id":"1","key_1":[{"key_2":"100","key_3":[{"key_4":[{"key_5":"-2"}],"key_7":"257"}]},{"key_2":"65536"}]}} +{"obj":{"id":"2","key_1":[{"key_2":"101","key_3":[{"key_4":[{"key_5":"-2"}]}]},{"key_2":"102","key_3":[{"key_7":"257"}]},{"key_2":"65536"}]}} +{"obj.key_1.:`Array(JSON)`.key_3":[[{"key_4":[{"key_5":"-2"}],"key_7":"257"}],null]} +{"obj.key_1.:`Array(JSON)`.key_3":[[{"key_4":[{"key_5":"-2"}]}],[{"key_7":"257"}],null]} +[[[-2]],[]] [[257],[]] +[[[-2]],[[]],[]] [[NULL],[257],[]] diff --git a/tests/queries/0_stateless/01825_new_type_json_11.sh b/tests/queries/0_stateless/01825_new_type_json_11.sh new file mode 100755 index 00000000000..f448b7433ab --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_11.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_11" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_json_11 (obj JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_json_type 1 + +cat < notEmpty(x), outpoints)" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" + +rm ${CLICKHOUSE_USER_FILES_UNIQUE}/btc_transactions.json diff --git a/tests/queries/0_stateless/01825_new_type_json_distributed.reference b/tests/queries/0_stateless/01825_new_type_json_distributed.reference new file mode 100644 index 00000000000..b2cbe847542 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_distributed.reference @@ -0,0 +1,4 @@ +{"k1":"2","k2":{"k3":"qqq","k4":["44","55"]}} {'k1':'Int64','k2.k3':'String','k2.k4':'Array(Nullable(Int64))'} +{"k1":"2","k2":{"k3":"qqq","k4":["44","55"]}} {'k1':'Int64','k2.k3':'String','k2.k4':'Array(Nullable(Int64))'} +2 qqq [44,55] +2 qqq [44,55] diff --git a/tests/queries/0_stateless/01825_new_type_json_distributed.sql b/tests/queries/0_stateless/01825_new_type_json_distributed.sql new file mode 100644 index 00000000000..0fede046927 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_distributed.sql @@ -0,0 +1,18 @@ +-- Tags: no-fasttest + +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS t_json_local; +DROP TABLE IF EXISTS t_json_dist; + +CREATE TABLE t_json_local(data JSON) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE t_json_dist AS t_json_local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_json_local); + +INSERT INTO t_json_local FORMAT JSONAsObject {"k1": 2, "k2": {"k3": "qqq", "k4": [44, 55]}} +; + +SELECT data, JSONAllPathsWithTypes(data) FROM t_json_dist; +SELECT data.k1, data.k2.k3, data.k2.k4 FROM t_json_dist; + +DROP TABLE IF EXISTS t_json_local; +DROP TABLE IF EXISTS t_json_dist; diff --git a/tests/queries/0_stateless/01825_new_type_json_ephemeral.reference b/tests/queries/0_stateless/01825_new_type_json_ephemeral.reference new file mode 100644 index 00000000000..7efe8cea252 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ephemeral.reference @@ -0,0 +1 @@ +PushEvent some-repo {"actor":{"avatar_url":"https:\\/\\/avatars.githubusercontent.com\\/u\\/123213213?","display_login":"github-actions","gravatar_id":"","id":"123123123","login":"github-actions[bot]","url":"https:\\/\\/api.github.com\\/users\\/github-actions[bot]"},"created_at":"2022-01-04 07:00:00","repo":{"id":"1001001010101","name":"some-repo","url":"https:\\/\\/api.github.com\\/repos\\/some-repo"},"type":"PushEvent"} diff --git a/tests/queries/0_stateless/01825_new_type_json_ephemeral.sql b/tests/queries/0_stateless/01825_new_type_json_ephemeral.sql new file mode 100644 index 00000000000..4aaebfd326f --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ephemeral.sql @@ -0,0 +1,18 @@ + +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS t_github_json; + +CREATE table t_github_json +( + event_type LowCardinality(String) DEFAULT JSONExtractString(message_raw, 'type'), + repo_name LowCardinality(String) DEFAULT JSONExtractString(message_raw, 'repo', 'name'), + message JSON DEFAULT empty(message_raw) ? '{}' : message_raw, + message_raw String EPHEMERAL +) ENGINE = MergeTree ORDER BY (event_type, repo_name); + +INSERT INTO t_github_json (message_raw) FORMAT JSONEachRow {"message_raw": "{\"type\":\"PushEvent\", \"created_at\": \"2022-01-04 07:00:00\", \"actor\":{\"avatar_url\":\"https://avatars.githubusercontent.com/u/123213213?\",\"display_login\":\"github-actions\",\"gravatar_id\":\"\",\"id\":123123123,\"login\":\"github-actions[bot]\",\"url\":\"https://api.github.com/users/github-actions[bot]\"},\"repo\":{\"id\":1001001010101,\"name\":\"some-repo\",\"url\":\"https://api.github.com/repos/some-repo\"}}"} + +SELECT * FROM t_github_json ORDER BY event_type, repo_name; + +DROP TABLE t_github_json; diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.reference b/tests/queries/0_stateless/01825_new_type_json_ghdata.reference new file mode 100644 index 00000000000..ca2fb7e8ff9 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.reference @@ -0,0 +1,12 @@ +5000 +leonardomso/33-js-concepts 3 +ytdl-org/youtube-dl 3 +Bogdanp/neko 2 +bminossi/AllVideoPocsFromHackerOne 2 +disclose/diodata 2 +Commit 182 +chipeo345 119 +phanwi346 114 +Nicholas Piggin 95 +direwolf-github 49 +2 diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh new file mode 100755 index 00000000000..f165223fb98 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: too memory hungry + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 + +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \ + --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject" + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" + +${CLICKHOUSE_CLIENT} -q \ +"SELECT data.repo.name, count() AS stars FROM ghdata \ + WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5" + +${CLICKHOUSE_CLIENT} --enable_analyzer=1 -q \ +"SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \ + ARRAY JOIN data.payload.commits[].author.name \ + GROUP BY name ORDER BY c DESC, name LIMIT 5" + +${CLICKHOUSE_CLIENT} -q "SELECT max(data.payload.pull_request.assignees[].size0) FROM ghdata" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.reference b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh new file mode 100755 index 00000000000..b450e9827c2 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-s3-storage, long, no-asan +# ^ no-s3-storage: it is memory-hungry, no-asan: too long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string" + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 + +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString" + +${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string" + +${CLICKHOUSE_CLIENT} -q "SELECT \ + (SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \ + (SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2)" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string" diff --git a/tests/queries/0_stateless/01825_new_type_json_in_array.reference b/tests/queries/0_stateless/01825_new_type_json_in_array.reference new file mode 100644 index 00000000000..aa33d9a7413 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_in_array.reference @@ -0,0 +1,30 @@ +{"id":1,"arr":[{"k1":"1","k2":{"k3":"2","k4":"3"}},{"k1":"2","k2":{"k5":"foo"}}]} +{"id":2,"arr":[{"k1":"3","k2":{"k3":"4","k4":"5"}}]} +1 [1,2] [2,NULL] [3,NULL] [NULL,'foo'] +2 [3] [4] [5] [NULL] +{"arr":{"k1":"1","k2":{"k3":"2","k4":"3"}}} +{"arr":{"k1":"2","k2":{"k5":"foo"}}} +{"arr":{"k1":"3","k2":{"k3":"4","k4":"5"}}} +('k1','Int64') +('k2.k3','Int64') +('k2.k4','Int64') +('k2.k5','String') +{"id":1,"arr":[{"k1":[{"k2":"aaa","k3":"bbb"},{"k2":"ccc"}]}]} +{"id":2,"arr":[{"k1":[{"k3":"ddd","k4":"10"},{"k4":"20"}],"k5":{"k6":"foo"}}]} +1 [['aaa','ccc']] [['bbb',NULL]] [[NULL,NULL]] [NULL] +2 [[NULL,NULL]] [['ddd',NULL]] [[10,20]] ['foo'] +{"k1":{"k2":"aaa","k3":"bbb"}} +{"k1":{"k2":"ccc"}} +{"k1":{"k3":"ddd","k4":"10"}} +{"k1":{"k4":"20"}} +('k2','String') +('k3','String') +('k4','Int64') +[['{"k2":"aaa","k3":"bbb"}','{"k2":"ccc"}']] +[['{"k3":"ddd","k4":"10"}','{"k4":"20"}']] +{"arr":[{"x":1}]} +{"arr":{"x":{"y":1},"t":{"y":2}}} +{"arr":[1,{"y":1}]} +{"arr":[2,{"y":2}]} +{"arr":[{"x":"aaa","y":["1","2","3"]}]} +{"arr":[{"x":1}]} diff --git a/tests/queries/0_stateless/01825_new_type_json_in_array.sql b/tests/queries/0_stateless/01825_new_type_json_in_array.sql new file mode 100644 index 00000000000..42ab1f64681 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_in_array.sql @@ -0,0 +1,39 @@ +-- Tags: no-fasttest + +SET allow_experimental_json_type = 1; +SET allow_experimental_analyzer = 1; +DROP TABLE IF EXISTS t_json_array; + +CREATE TABLE t_json_array (id UInt32, arr Array(JSON)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": 1, "k2": {"k3": 2, "k4": 3}}, {"k1": 2, "k2": {"k5": "foo"}}]} + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": 3, "k2": {"k3": 4, "k4": 5}}]} + + +SELECT * FROM t_json_array ORDER BY id FORMAT JSONEachRow; +SELECT id, arr.k1, arr.k2.k3, arr.k2.k4, arr.k2.k5 FROM t_json_array ORDER BY id; +SELECT arr FROM t_json_array ARRAY JOIN arr ORDER BY arr.k1 FORMAT JSONEachRow; +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arr))) as path FROM t_json_array order by path; + +TRUNCATE TABLE t_json_array; + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} + +SELECT * FROM t_json_array ORDER BY id FORMAT JSONEachRow; +SELECT id, arr.k1[].k2, arr.k1[].k3, arr.k1[].k4, arr.k5.k6 FROM t_json_array ORDER BY id; + +SELECT arrayJoin(arrayJoin(arr.k1[])) AS k1 FROM t_json_array ORDER BY toString(k1) FORMAT JSONEachRow; +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arr.k1[])))) AS path FROM t_json_array order by path; + +SELECT arr.k1 FROM t_json_array GROUP BY arr.k1 ORDER BY toString(arr.k1); + +DROP TABLE t_json_array; + +SELECT * FROM values('arr Array(JSON)', '[\'{"x" : 1}\']') FORMAT JSONEachRow; +SELECT * FROM values('arr Map(String, JSON)', '{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}') FORMAT JSONEachRow; +SELECT * FROM values('arr Tuple(Int32, JSON)', '(1, \'{"y" : 1}\')', '(2, \'{"y" : 2}\')') FORMAT JSONEachRow; +SELECT * FROM format(JSONEachRow, '{"arr" : [{"x" : "aaa", "y" : [1,2,3]}]}') FORMAT JSONEachRow; +SELECT * FROM values('arr Array(JSON)', '[\'{"x" : 1}\']') FORMAT JSONEachRow; diff --git a/tests/queries/0_stateless/01825_new_type_json_in_other_types.reference b/tests/queries/0_stateless/01825_new_type_json_in_other_types.reference new file mode 100644 index 00000000000..03913e5098e --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_in_other_types.reference @@ -0,0 +1,17 @@ +Tuple(String, Map(String, Array(JSON)), JSON) +============= +{"id":1,"data":["foo",{"aa":[{"k1":[{"k2":"1","k3":"2"},{"k3":"3"}]},{"k1":[{"k2":"4"},{"k3":"5"},{"k2":"6"}],"k4":"qqq"}],"bb":[{"k4":"www"},{"k1":[{"k2":"7","k3":"8"},{"k2":"9","k3":"10"},{"k2":"11","k3":"12"}]}]},{"k1":"aa","k2":{"k3":"bb","k4":"c"}}]} +{"id":2,"data":["bar",{"aa":[{"k1":[{"k2":"13","k3":"14"},{"k2":"15","k3":"16"}],"k4":"www"}]},{}]} +{"id":3,"data":["some",{"aa":[{"k1":[{"k3":"20","k5":"some"}]}]},{"k1":"eee"}]} +============= +{"aa":[{"k1":[{"k2":"1","k3":"2"},{"k3":"3"}]},{"k1":[{"k2":"4"},{"k3":"5"},{"k2":"6"}],"k4":"qqq"}],"bb":[{"k4":"www"},{"k1":[{"k2":"7","k3":"8"},{"k2":"9","k3":"10"},{"k2":"11","k3":"12"}]}]} +{"aa":[{"k1":[{"k2":"13","k3":"14"},{"k2":"15","k3":"16"}],"k4":"www"}],"bb":[]} +{"aa":[{"k1":[{"k3":"20","k5":"some"}]}],"bb":[]} +============= +{"k1":[[{"k2":"1","k3":"2"},{"k3":"3"}],[{"k2":"4"},{"k3":"5"},{"k2":"6"}]],"k4":[null,"qqq"]} +{"k1":[[{"k2":"13","k3":"14"},{"k2":"15","k3":"16"}]],"k4":["www"]} +{"k1":[[{"k3":"20","k5":"some"}]],"k4":[null]} +============= +{"obj":{"k1":"aa","k2":{"k3":"bb","k4":"c"}}} +{"obj":{}} +{"obj":{"k1":"eee"}} diff --git a/tests/queries/0_stateless/01825_new_type_json_in_other_types.sh b/tests/queries/0_stateless/01825_new_type_json_in_other_types.sh new file mode 100755 index 00000000000..1c7b64c73a1 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_in_other_types.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_nested" + +${CLICKHOUSE_CLIENT} -q " + CREATE TABLE t_json_nested + ( + id UInt32, + data Tuple(String, Map(String, Array(JSON)), JSON) + ) + ENGINE = MergeTree ORDER BY id" --allow_experimental_json_type 1 + +cat < 1; + +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM type_json_dst ORDER BY path; +SELECT id, data FROM type_json_dst ORDER BY id; + +INSERT INTO type_json_dst VALUES (4, '{"arr": [{"k11": 5, "k22": 6}, {"k11": 7, "k33": 8}]}'); + +INSERT INTO type_json_src VALUES (5, '{"arr": "not array"}'); + +INSERT INTO type_json_dst SELECT * FROM type_json_src WHERE id = 5; + +TRUNCATE TABLE type_json_src; +INSERT INTO type_json_src VALUES (6, '{"arr": [{"k22": "str1"}]}'); + +INSERT INTO type_json_dst SELECT * FROM type_json_src WHERE id = 5; + +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM type_json_dst ORDER BY path; +SELECT id, data FROM type_json_dst ORDER BY id; + +DROP TABLE type_json_src; +DROP TABLE type_json_dst; + +CREATE TABLE type_json_dst (data JSON) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE type_json_src (data String) ENGINE = MergeTree ORDER BY tuple(); + +SYSTEM STOP MERGES type_json_src; + +SET max_threads = 1; +SET max_insert_threads = 1; +SET output_format_json_named_tuples_as_objects = 1; + +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]}; + +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]}; + +INSERT INTO type_json_dst SELECT data FROM type_json_src; + +SELECT * FROM type_json_dst ORDER BY data.k1 FORMAT JSONEachRow; +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM type_json_dst ORDER BY path; + +TRUNCATE TABLE type_json_src; +TRUNCATE TABLE type_json_dst; + +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]}; + +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]}; + +INSERT INTO type_json_dst SELECT data FROM type_json_src; + +SELECT * FROM type_json_dst ORDER BY data.k1 FORMAT JSONEachRow; +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM type_json_dst ORDER BY path; + +DROP TABLE type_json_src; +DROP TABLE type_json_dst; diff --git a/tests/queries/0_stateless/01825_new_type_json_missed_values.reference b/tests/queries/0_stateless/01825_new_type_json_missed_values.reference new file mode 100644 index 00000000000..952b5652bc1 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_missed_values.reference @@ -0,0 +1,4 @@ +('foo','Int64') +('k1','Int64') +('k2','Int64') +1 diff --git a/tests/queries/0_stateless/01825_new_type_json_missed_values.sql b/tests/queries/0_stateless/01825_new_type_json_missed_values.sql new file mode 100644 index 00000000000..84bd8a19c18 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_missed_values.sql @@ -0,0 +1,19 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS t_json; + +SET allow_experimental_json_type = 1; + +CREATE TABLE t_json(id UInt64, obj JSON) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +SYSTEM STOP MERGES t_json; + +INSERT INTO t_json SELECT number, '{"k1": 1, "k2": 2}' FROM numbers(1000000); +INSERT INTO t_json VALUES (1000001, '{"foo": 1}'); + +SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) AS path FROM t_json ORDER BY path; +SELECT count() FROM t_json WHERE obj.foo IS NOT NULL; + +DROP TABLE IF EXISTS t_json; diff --git a/tests/queries/0_stateless/01825_new_type_json_multiple_files.reference b/tests/queries/0_stateless/01825_new_type_json_multiple_files.reference new file mode 100644 index 00000000000..63c12792c17 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_multiple_files.reference @@ -0,0 +1,22 @@ +{"data":{"k0":"100"}} +{"data":{"k1":"100"}} +{"data":{"k2":"100"}} +{"data":{"k3":"100"}} +{"data":{"k4":"100"}} +{"data":{"k5":"100"}} +('k0','Int64') +('k1','Int64') +('k2','Int64') +('k3','Int64') +('k4','Int64') +('k5','Int64') +{"data":{"k0":"100"}} +{"data":{"k1":"100"}} +{"data":{"k2":"100"}} +('k0','Int64') +('k1','Int64') +('k2','Int64') +{"data":{"k1":"100"}} +{"data":{"k3":"100"}} +('k1','Int64') +('k3','Int64') diff --git a/tests/queries/0_stateless/01825_new_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_new_type_json_multiple_files.sh new file mode 100755 index 00000000000..9cb37987628 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_multiple_files.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +for f in "${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE}"_*.json; do + [ -e $f ] && rm $f +done + +for i in {0..5}; do + echo "{\"k$i\": 100}" > "$USER_FILES_PATH/${CLICKHOUSE_DATABASE}_$i.json" +done + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_json_type 1 + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_json_type 1 + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_files ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ + ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_json_type 1 + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file, data FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_files ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ + WHERE _file IN ('${CLICKHOUSE_DATABASE}_1.json', '${CLICKHOUSE_DATABASE}_3.json')" --allow_experimental_json_type 1 + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_files ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" +rm "$USER_FILES_PATH"/${CLICKHOUSE_DATABASE}_*.json diff --git a/tests/queries/0_stateless/01825_new_type_json_mutations.reference b/tests/queries/0_stateless/01825_new_type_json_mutations.reference new file mode 100644 index 00000000000..c7523661a3b --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_mutations.reference @@ -0,0 +1,7 @@ +1 q {"k1":"1","k2":"2","k3":[{"k4":"aaa"},{"k4":"bbb"}]} +2 w {"k1":"3","k2":"4","k3":[{"k4":"ccc"}]} +3 e {"k1":"5","k2":"6"} +1 q {"k1":"1","k2":"2","k3":[{"k4":"aaa"},{"k4":"bbb"}]} +3 e {"k1":"5","k2":"6"} +1 foo +3 foo diff --git a/tests/queries/0_stateless/01825_new_type_json_mutations.sql b/tests/queries/0_stateless/01825_new_type_json_mutations.sql new file mode 100644 index 00000000000..77feee692d9 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_mutations.sql @@ -0,0 +1,21 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS t_json_mutations; + +SET allow_experimental_json_type = 1; +SET output_format_json_named_tuples_as_objects = 1; +SET mutations_sync = 2; + +CREATE TABLE t_json_mutations(id UInt32, s String, obj JSON) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_json_mutations VALUES (1, 'q', '{"k1": 1, "k2": 2, "k3": [{"k4": "aaa"}, {"k4": "bbb"}]}'); +INSERT INTO t_json_mutations VALUES (2, 'w', '{"k1": 3, "k2": 4, "k3": [{"k4": "ccc"}]}'); +INSERT INTO t_json_mutations VALUES (3, 'e', '{"k1": 5, "k2": 6}'); + +SELECT * FROM t_json_mutations ORDER BY id; +ALTER TABLE t_json_mutations DELETE WHERE id = 2; +SELECT * FROM t_json_mutations ORDER BY id; +ALTER TABLE t_json_mutations DROP COLUMN s, DROP COLUMN obj, ADD COLUMN t String DEFAULT 'foo'; +SELECT * FROM t_json_mutations ORDER BY id; + +DROP TABLE t_json_mutations; diff --git a/tests/queries/0_stateless/01825_new_type_json_nbagames.reference b/tests/queries/0_stateless/01825_new_type_json_nbagames.reference new file mode 100644 index 00000000000..9be03136b68 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_nbagames.reference @@ -0,0 +1,61 @@ +1000 +('_id.$oid','String') +('date.$date','String') +('teams','Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))') +('abbreviation','String') +('city','String') +('home','Bool') +('name','String') +('players','Array(JSON(max_dynamic_types=8, max_dynamic_paths=64))') +('results.ast','Int64') +('results.blk','Int64') +('results.drb','Int64') +('results.fg','Int64') +('results.fg3','Int64') +('results.fg3_pct','String') +('results.fg3a','Int64') +('results.fg_pct','String') +('results.fga','Int64') +('results.ft','Int64') +('results.ft_pct','String') +('results.fta','Int64') +('results.mp','Int64') +('results.orb','Int64') +('results.pf','Int64') +('results.pts','Int64') +('results.stl','Int64') +('results.tov','Int64') +('results.trb','Int64') +('score','Int64') +('won','Int64') +Boston Celtics 70 +Los Angeles Lakers 64 +Milwaukee Bucks 61 +Philadelphia 76ers 57 +Atlanta Hawks 55 +('ast','Int64') +('blk','Int64') +('drb','Int64') +('fg','Int64') +('fg3','Int64') +('fg3_pct','String') +('fg3a','Int64') +('fg_pct','String') +('fga','Int64') +('ft','Int64') +('ft_pct','String') +('fta','Int64') +('mp','String') +('orb','Int64') +('pf','Int64') +('player','String') +('pts','Int64') +('stl','Int64') +('tov','Int64') +('trb','Int64') +Larry Bird 10 +Clyde Drexler 4 +Alvin Robertson 3 +Magic Johnson 3 +Charles Barkley 2 +1 diff --git a/tests/queries/0_stateless/01825_new_type_json_nbagames.sh b/tests/queries/0_stateless/01825_new_type_json_nbagames.sh new file mode 100755 index 00000000000..20eba88eda4 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_nbagames.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_from_string" + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_json_type 1 + +cat $CUR_DIR/data_json/nbagames_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames FORMAT JSONAsObject" + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM nbagames WHERE NOT ignore(*)" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) as path from nbagames order by path" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.teams[]))) as path from nbagames order by path" + +${CLICKHOUSE_CLIENT} --allow_experimental_analyzer=1 -q \ + "SELECT teams.name.:String AS name, sum(teams.won.:Int64) AS wins FROM nbagames \ + ARRAY JOIN data.teams[] AS teams GROUP BY name \ + ORDER BY wins DESC LIMIT 5;" + +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.teams[].players[])))) as path from nbagames order by path" + +${CLICKHOUSE_CLIENT} --allow_experimental_analyzer=1 -q \ +"SELECT player, sum(triple_double) AS triple_doubles FROM \ +( \ + SELECT \ + arrayJoin(arrayJoin(data.teams[].players[])) as players, \ + players.player.:String as player, \ + ((players.pts.:Int64 >= 10) + \ + (players.ast.:Int64 >= 10) + \ + (players.blk.:Int64 >= 10) + \ + (players.stl.:Int64 >= 10) + \ + (players.trb.:Int64 >= 10)) >= 3 AS triple_double \ + from nbagames \ +) \ +GROUP BY player ORDER BY triple_doubles DESC, player LIMIT 5" + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_string (data String) ENGINE = MergeTree ORDER BY tuple()" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_json_type 1 + +cat $CUR_DIR/data_json/nbagames_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_string FORMAT JSONAsString" +${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_from_string SELECT data FROM nbagames_string" + +${CLICKHOUSE_CLIENT} -q "SELECT \ + (SELECT groupUniqArrayMap(JSONAllPathsWithTypes(data)), sum(cityHash64(toString(data))) FROM nbagames_from_string) = \ + (SELECT groupUniqArrayMap(JSONAllPathsWithTypes(data)), sum(cityHash64(toString(data))) FROM nbagames)" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_from_string" diff --git a/tests/queries/0_stateless/01825_new_type_json_order_by.reference b/tests/queries/0_stateless/01825_new_type_json_order_by.reference new file mode 100644 index 00000000000..611d2835127 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_order_by.reference @@ -0,0 +1,6 @@ +0 +0 +{"k":"v"} + +{"k":"v"} +{"k":"v"} diff --git a/tests/queries/0_stateless/01825_new_type_json_order_by.sql b/tests/queries/0_stateless/01825_new_type_json_order_by.sql new file mode 100644 index 00000000000..6b5fb40aed4 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_order_by.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +SET allow_experimental_json_type = 1; +SELECT dummy FROM system.one ORDER BY materialize('{"k":"v"}'::JSON); +SELECT dummy FROM system.one ORDER BY materialize('{"k":"v"}'::JSON), dummy; +SELECT materialize('{"k":"v"}'::JSON) SETTINGS extremes = 1; diff --git a/tests/queries/0_stateless/01825_new_type_json_parallel_insert.reference b/tests/queries/0_stateless/01825_new_type_json_parallel_insert.reference new file mode 100644 index 00000000000..7cf3855d684 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_parallel_insert.reference @@ -0,0 +1 @@ +{'k1':['Int64'],'k2':['String']} 500000 diff --git a/tests/queries/0_stateless/01825_new_type_json_parallel_insert.sql b/tests/queries/0_stateless/01825_new_type_json_parallel_insert.sql new file mode 100644 index 00000000000..a8457ff4f15 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_parallel_insert.sql @@ -0,0 +1,10 @@ +-- Tags: long +DROP TABLE IF EXISTS t_json_parallel; + +SET allow_experimental_json_type = 1, max_insert_threads = 20, max_threads = 20, min_insert_block_size_rows = 65536; +CREATE TABLE t_json_parallel (data JSON) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_json_parallel SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers_mt(500000); +SELECT groupUniqArrayMap(JSONAllPathsWithTypes(data)), count() FROM t_json_parallel; + +DROP TABLE t_json_parallel; diff --git a/tests/queries/0_stateless/01825_new_type_json_partitions.reference b/tests/queries/0_stateless/01825_new_type_json_partitions.reference new file mode 100644 index 00000000000..c5839472132 --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_partitions.reference @@ -0,0 +1,2 @@ +{"id":1,"obj":{"k1":"v1"}} +{"id":2,"obj":{"k2":"v2"}} diff --git a/tests/queries/0_stateless/01825_new_type_json_partitions.sql b/tests/queries/0_stateless/01825_new_type_json_partitions.sql new file mode 100644 index 00000000000..d1f37dedded --- /dev/null +++ b/tests/queries/0_stateless/01825_new_type_json_partitions.sql @@ -0,0 +1,14 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS t_json_partitions; + +SET allow_experimental_json_type = 1; + +CREATE TABLE t_json_partitions (id UInt32, obj JSON) +ENGINE MergeTree ORDER BY id PARTITION BY id; + +INSERT INTO t_json_partitions FORMAT JSONEachRow {"id": 1, "obj": {"k1": "v1"}} {"id": 2, "obj": {"k2": "v2"}}; + +SELECT * FROM t_json_partitions ORDER BY id FORMAT JSONEachRow; + +DROP TABLE t_json_partitions; diff --git a/tests/queries/0_stateless/01825_type_json_10.sql b/tests/queries/0_stateless/01825_type_json_10.sql index e13026770f6..3ddbf85ba63 100644 --- a/tests/queries/0_stateless/01825_type_json_10.sql +++ b/tests/queries/0_stateless/01825_type_json_10.sql @@ -4,7 +4,7 @@ SET allow_experimental_object_type = 1; SET output_format_json_named_tuples_as_objects = 1; DROP TABLE IF EXISTS t_json_10; -CREATE TABLE t_json_10 (o JSON) ENGINE = Memory; +CREATE TABLE t_json_10 (o Object('json')) ENGINE = Memory; INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 1, "c": [{"d": 10, "e": [31]}, {"d": 20, "e": [63, 127]}]}} {"a": {"b": 2, "c": []}} diff --git a/tests/queries/0_stateless/01825_type_json_11.sh b/tests/queries/0_stateless/01825_type_json_11.sh index dbed15c8bb9..6109dff53a6 100755 --- a/tests/queries/0_stateless/01825_type_json_11.sh +++ b/tests/queries/0_stateless/01825_type_json_11.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_11" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_json_11 (obj JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_json_11 (obj Object('json')) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 cat < concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); + ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;" + +function test() +{ + # Execute two queries and compare if they have similar memory usage: + # The first query uses the default column value, while the second explicitly uses the same SQL as the default value. + # Follow https://github.com/ClickHouse/ClickHouse/issues/17317 for more info about the issue + where=$1 + + uuid_1=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1 + uuid_2=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2 + $CLICKHOUSE_CLIENT --query=" + SYSTEM FLUSH LOGS; + WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1), + memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2) + SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage OR + memory_2.memory_usage <= 1.2 * memory_1.memory_usage FROM memory_1, memory_2;" +} + +test "" +test "PREWHERE 15Id < 4" +test "WHERE 15Id < 4" diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql deleted file mode 100644 index 2eec08635eb..00000000000 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Tags: no-random-merge-tree-settings - -CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); - -INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); - -ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1])); - -SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; diff --git a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh index 853445daf3f..a44106414ea 100755 --- a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh +++ b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS dictionary_array_source_table; CREATE TABLE dictionary_array_source_table ( diff --git a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh index 0b555cf82c2..a5c65ca87a7 100755 --- a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh +++ b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS dictionary_nullable_source_table; CREATE TABLE dictionary_nullable_source_table ( diff --git a/tests/queries/0_stateless/01927_query_views_log_matview_exceptions.sh b/tests/queries/0_stateless/01927_query_views_log_matview_exceptions.sh index 47d5e733480..608107c76d6 100755 --- a/tests/queries/0_stateless/01927_query_views_log_matview_exceptions.sh +++ b/tests/queries/0_stateless/01927_query_views_log_matview_exceptions.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - ${CLICKHOUSE_CLIENT} -n -q " + ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS matview_exception_a_to_c; DROP TABLE IF EXISTS matview_exception_a_to_b; DROP TABLE IF EXISTS table_exception_c; @@ -17,7 +17,7 @@ function cleanup() function setup() { - ${CLICKHOUSE_CLIENT} -n -q " + ${CLICKHOUSE_CLIENT} -q " CREATE TABLE table_exception_a (a String, b Int64) ENGINE = MergeTree ORDER BY b; CREATE TABLE table_exception_b (a Float64, b Int64) ENGINE = MergeTree ORDER BY tuple(); CREATE TABLE table_exception_c (a Float64) ENGINE = MergeTree ORDER BY a; diff --git a/tests/queries/0_stateless/01947_multiple_pipe_read.sh b/tests/queries/0_stateless/01947_multiple_pipe_read.sh index 06a18a55e6e..51709eb574e 100755 --- a/tests/queries/0_stateless/01947_multiple_pipe_read.sh +++ b/tests/queries/0_stateless/01947_multiple_pipe_read.sh @@ -11,7 +11,7 @@ cat "$SAMPLE_FILE" echo '******************' echo 'Read twice from a regular file' -${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -n -q 'select * from table; select * from table;' --file "$SAMPLE_FILE" +${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -q 'select * from table; select * from table;' --file "$SAMPLE_FILE" echo '---' ${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -q 'select * from table WHERE x IN (select x from table);' --file "$SAMPLE_FILE" echo '---' @@ -19,7 +19,7 @@ ${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -q 'select * from table UNI echo '******************' echo 'Read twice from file descriptor that corresponds to a regular file' -${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -n -q 'select * from table; select * from table;' < "$SAMPLE_FILE" +${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -q 'select * from table; select * from table;' < "$SAMPLE_FILE" echo '---' ${CLICKHOUSE_LOCAL} --structure 'x UInt64, s String' -q 'select * from table WHERE x IN (select x from table);' < "$SAMPLE_FILE" echo '---' diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 485c8192f69..22eb8e887f2 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -2,5 +2,5 @@ SET max_bytes_before_external_group_by = 0; -SET max_memory_usage = '100M'; +SET max_memory_usage = '100M', max_rows_to_read = '1G'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh index 96028f4847a..32e8c32f009 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh @@ -4,21 +4,21 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client=1 -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5K' -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5k' -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2P' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2.1p' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10E' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10.2e' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1.1T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1m' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='14g' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11t' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2P' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2.1p' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10E' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10.2e' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1.1T' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1m' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='14g' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11t' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh index 3448f052f51..fa2d84e131a 100755 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh @@ -16,7 +16,7 @@ function test_table_comments() local ENGINE_NAME="$1" echo "engine : ${ENGINE_NAME}" - $CLICKHOUSE_CLIENT -nm <& /dev/null +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' >& /dev/null echo $? echo 'regression test for overlap profile events snapshots between queries' -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' echo 'regression test for overlap profile events snapshots between queries (clickhouse-local)' -$CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' +$CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' echo 'print everything' profile_events="$( @@ -35,5 +35,5 @@ profile_events="$( test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' -sleep_function_calls=$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select sleep(1); select 1' |& grep -c 'SleepFunctionCalls') +sleep_function_calls=$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select sleep(1); select 1' |& grep -c 'SleepFunctionCalls') test "$sleep_function_calls" -eq 1 && echo OK || echo "FAIL ($sleep_function_calls)" diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference index a3bac432482..deabef61a88 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference @@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo -- MONOGRAM FOR YANG with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_ select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV; -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -- NOTE: regression test for introduced bug -- https://github.com/ClickHouse/ClickHouse/issues/42756 diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql index 8ca0a3f5f75..d175e0659d0 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + drop table if exists utf8_overlap; create table utf8_overlap (str String) engine=Memory(); diff --git a/tests/queries/0_stateless/02099_tsv_raw_format_1.sh b/tests/queries/0_stateless/02099_tsv_raw_format_1.sh index a3468f46ca0..bd1f8731717 100755 --- a/tests/queries/0_stateless/02099_tsv_raw_format_1.sh +++ b/tests/queries/0_stateless/02099_tsv_raw_format_1.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: long +# This test was split in two due to long runtimes in sanitizers. +# The other part is 02099_tsv_raw_format_2.sh. + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02099_tsv_raw_format_2.sh b/tests/queries/0_stateless/02099_tsv_raw_format_2.sh index d6034a0616f..9f57eea42f2 100755 --- a/tests/queries/0_stateless/02099_tsv_raw_format_2.sh +++ b/tests/queries/0_stateless/02099_tsv_raw_format_2.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: long +# This test was split in two due to long runtimes in sanitizers. +# The other part is 02099_tsv_raw_format_1.sh. +# CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index cfae4fee6c2..638a46a142f 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -52,6 +52,8 @@ CREATE TABLE system.clusters `database_shard_name` String, `database_replica_name` String, `is_active` Nullable(UInt8), + `replication_lag` Nullable(UInt32), + `recovery_time` Nullable(UInt64), `name` String ALIAS cluster ) ENGINE = SystemClusters @@ -508,9 +510,15 @@ CREATE TABLE system.parts `rows_where_ttl_info.max` Array(DateTime), `projections` Array(String), `visible` UInt8, - `creation_tid` Tuple(UInt64, UInt64, UUID), + `creation_tid` Tuple( + UInt64, + UInt64, + UUID), `removal_tid_lock` UInt64, - `removal_tid` Tuple(UInt64, UInt64, UUID), + `removal_tid` Tuple( + UInt64, + UInt64, + UUID), `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference index f314e22e519..6500560e8fc 100644 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.reference +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -1,4 +1,6 @@ -Code: 159 -0 +Code: 159 +query_duration 1 +0 +query_duration 1 Code: 159 0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 8380c5dbd0c..0e89fcf56d9 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,27 +1,24 @@ #!/usr/bin/env bash -# Tags: no-debug - -# no-debug: Query is canceled by timeout after max_execution_time, -# but sending an exception to the client may hang -# for more than MAX_PROCESS_WAIT seconds in a slow debug build, -# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -MAX_PROCESS_WAIT=5 - -IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'") -if [ "$IS_SANITIZER" -gt 0 ]; then - # Query may hang for more than 5 seconds, especially in tsan build - MAX_PROCESS_WAIT=15 +TIMEOUT=5 +IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'") +if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then + # Increase the timeout due to in debug/sanitizers build: + # - client is slow + # - stacktrace resolving is slow + TIMEOUT=15 fi # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ - "SELECT * FROM +### However, in the test, the server can be overloaded, so we assert query duration in the interval of 1 to 60 seconds. +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --query_id "$query_id" --max_execution_time 1 -q " + SELECT * FROM ( SELECT a.name as n FROM @@ -34,28 +31,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -m1 -o "Code: 159" +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) BETWEEN 1 AND 60 from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" + ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ - "SELECT a.name as n - FROM - ( - SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 - ) AS a, - ( - SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 - ) as b - FORMAT Null - SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --query_id "$query_id" -q " + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' +" echo $? +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) BETWEEN 1 AND 60 from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ - "SELECT * FROM +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1&max_result_rows=0&max_result_bytes=0" -d " + SELECT * FROM ( SELECT a.name as n FROM @@ -68,12 +72,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ - "SELECT a.name as n +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d " + SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 @@ -83,5 +88,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +" echo $? diff --git a/tests/queries/0_stateless/02136_kill_scalar_queries.sh b/tests/queries/0_stateless/02136_kill_scalar_queries.sh index c8691b62360..f8bd5a42756 100755 --- a/tests/queries/0_stateless/02136_kill_scalar_queries.sh +++ b/tests/queries/0_stateless/02136_kill_scalar_queries.sh @@ -10,7 +10,7 @@ function wait_for_query_to_start() } QUERY_1_ID="${CLICKHOUSE_DATABASE}_TEST02132KILL_QUERY1" -(${CLICKHOUSE_CLIENT} --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & +(${CLICKHOUSE_CLIENT} --max_rows_to_read 0 --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & wait_for_query_to_start "${QUERY_1_ID}" ${CLICKHOUSE_CLIENT} --query="KILL QUERY WHERE query_id='${QUERY_1_ID}' SYNC" diff --git a/tests/queries/0_stateless/02136_scalar_progress.reference b/tests/queries/0_stateless/02136_scalar_progress.reference index 5378c52de89..b8957f78e6d 100644 --- a/tests/queries/0_stateless/02136_scalar_progress.reference +++ b/tests/queries/0_stateless/02136_scalar_progress.reference @@ -1,6 +1,7 @@ < X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"65505","read_bytes":"524040","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} -< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference index 0bb8966cbe4..0e74c0a083e 100644 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') -CREATE TABLE foo.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh index 934d87616ac..3a95e59416a 100755 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' -$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' +$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql index c2b97231145..f8c76038120 100644 --- a/tests/queries/0_stateless/02151_lc_prefetch.sql +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -3,5 +3,6 @@ drop table if exists tab_lc; CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into tab_lc select number, toString(number % 10) from numbers(20000000); optimize table tab_lc; +SET max_rows_to_read = '21M'; select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; drop table if exists tab_lc; diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index cf400ed34c5..d7ce133f38c 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -6,7 +6,7 @@ SELECT addressToLineWithInlines(1); -- { serverError FUNCTION_NOT_ALLOWED } SET allow_introspection_functions = 1; SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; -SET log_queries = 1; +SET log_queries = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case'; SET log_queries = 0; SET query_profiler_cpu_time_period_ns = 0; diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect deleted file mode 100755 index add977c4fce..00000000000 --- a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/expect -f - -set basedir [file dirname $argv0] -set basename [file tail $argv0] -if {[info exists env(CLICKHOUSE_TMP)]} { - set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP) -} else { - set CLICKHOUSE_TMP "." -} -exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0 - -log_user 0 -set timeout 20 -match_max 100000 - -expect_after { - -i $any_spawn_id eof { exp_continue } - -i $any_spawn_id timeout { exit 1 } -} - -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" - -expect ":) " -send -- "insert into table function null() format TSV some trash here 123 \n 456\r" -expect "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -expect ":) " - -send -- "" -expect eof diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.python b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.python new file mode 100644 index 00000000000..4c2df9556a1 --- /dev/null +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.python @@ -0,0 +1,95 @@ +import pty +import os +import shlex +import time +import multiprocessing + +COMPLETION_TIMEOUT_SECONDS = 30 +DEBUG_LOG = os.path.join( + os.environ["CLICKHOUSE_TMP"], + os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog", +) + +STATE_MAP = { + -1: "process did not start", + 0: "all good", + 1: "process started and said ':)'", + 2: "prompt search was started", + 3: "prompt is missing", +} + + +def run_with_timeout(func, args, timeout): + for _ in range(5): + state = multiprocessing.Value("i", -1) + process = multiprocessing.Process( + target=func, args=args, kwargs={"state": state} + ) + process.start() + process.join(timeout) + + if state.value in (0, 3): + return + + if process.is_alive(): + process.terminate() + + if state.value == -1: + continue + + print(f"Timeout, state: {STATE_MAP[state.value]}") + return + + +def expect(text, master, debug_log_fd, output=""): + while not text in output: + output_b = os.read(master, 4096) + output += output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() + + return output + + +def test_completion(program, argv, prompt, state=None): + shell_pid, master = pty.fork() + if shell_pid == 0: + os.execv(program, argv) + else: + try: + debug_log_fd = open(DEBUG_LOG, "a") + + expect(":)", master, debug_log_fd) + + state.value = 1 + os.write(master, bytes(prompt.encode())) + expect(prompt[:-10], master, debug_log_fd) + + time.sleep(0.01) + os.write(master, b"\r") + state.value = 2 + + output = expect("CANNOT_PARSE_INPUT_ASSERTION_FAILED", master, debug_log_fd) + expect(":)", master, debug_log_fd, output) + + print("OK") + state.value = 0 + finally: + os.close(master) + debug_log_fd.close() + + +if __name__ == "__main__": + clickhouse_local = os.environ["CLICKHOUSE_LOCAL"] + args = shlex.split(clickhouse_local) + args.append("--disable_suggestion") + args.append("--highlight=0") + run_with_timeout( + test_completion, + [ + args[0], + args, + "insert into table function null() format TSV some trash here 123 \n 456", + ], + COMPLETION_TIMEOUT_SECONDS, + ) diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference index e69de29bb2d..d86bac9de59 100644 --- a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql index f25df59f4b4..5c62b5a9c2f 100644 --- a/tests/queries/0_stateless/02177_issue_31009.sql +++ b/tests/queries/0_stateless/02177_issue_31009.sql @@ -8,6 +8,8 @@ DROP TABLE IF EXISTS right; CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +SET max_rows_to_read = '50M'; + INSERT INTO left SELECT number, toString(number) FROM numbers(25367182); INSERT INTO right SELECT number, toString(number) FROM numbers(23124707); diff --git a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 index 47940356302..7df77595347 100644 --- a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 +++ b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-flaky-check +-- It can be too long with ThreadFuzzer DROP TABLE IF EXISTS left; DROP TABLE IF EXISTS right; diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 035bd9897ad..12ba17103da 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -6,6 +6,6 @@ ExpressionTransform ExpressionTransform 1 1 1 1 1 LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 -NullSource 1 0 0 0 0 -NullSource 1 0 0 0 0 +NullSource 0 0 0 0 0 +NullSource 0 0 0 0 0 SourceFromSingleChunk 1 0 0 1 1 diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 75e5bcbb585..a850f4312b3 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -20,8 +20,8 @@ SELECT -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. name = 'SourceFromSingleChunk', output_wait_elapsed_us >= 0.9e6 ? 1 : output_wait_elapsed_us, - -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs - -- so they cannot starts to execute before sleep(1) will be executed. + -- LazyOutputFormatLazyOutputFormat is the output + -- so it cannot starts to execute before sleep(1) will be executed. input_wait_elapsed_us>=1e6 ? 1 : input_wait_elapsed_us) elapsed, input_rows, diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference index 1e3d3a50562..e3978020431 100644 --- a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference @@ -1 +1 @@ -Const(ColumnLowCardinality) +Const(LowCardinality(UInt8)) diff --git a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh index 3c44a2a7ba7..a382b3859f3 100755 --- a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh +++ b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null +${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 -m < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh index db94c59d2de..e23a272a4e8 100755 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" -${CLICKHOUSE_CLIENT} -n -q" +${CLICKHOUSE_CLIENT} -q" CREATE TABLE sample_table ( key UInt64 ) @@ -16,7 +16,7 @@ ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_ ORDER BY tuple(); " -${CLICKHOUSE_CLIENT} -n -q" +${CLICKHOUSE_CLIENT} -q" CREATE TABLE sample_table_2 ( key UInt64 ) diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh index c62ec14b340..6381d811d5d 100755 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" -${CLICKHOUSE_CLIENT} -n --query="CREATE TABLE sample_table ( +${CLICKHOUSE_CLIENT} --query="CREATE TABLE sample_table ( key UInt64 ) ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like', '1') @@ -16,7 +16,7 @@ ORDER BY tuple(); DROP TABLE IF EXISTS sample_table SYNC;" -${CLICKHOUSE_CLIENT} -n --query "CREATE TABLE sample_table_2 ( +${CLICKHOUSE_CLIENT} --query "CREATE TABLE sample_table_2 ( key UInt64 ) ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like_2', '1') diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh index 376a49fd820..63111cc32e4 100755 --- a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists dst_02225; drop table if exists src_02225; create table dst_02225 (key Int) engine=Memory(); @@ -14,7 +14,7 @@ create table src_02225 (key Int) engine=Memory(); insert into src_02225 values (1); " -$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -nm -q " +$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -m -q " truncate table dst_02225; insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225), key) @@ -29,7 +29,7 @@ settings parallel_distributed_insert_select=2, max_distributed_depth=1; select * from dst_02225; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table src_02225; drop table dst_02225; " diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql index b23e5640b8f..0c150249aeb 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql @@ -1,3 +1,5 @@ +SET allow_hyperscan = 1, max_hyperscan_regexp_length = 0, max_hyperscan_regexp_total_length = 0; + EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0; EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, enable_analyzer = 1; EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh index bc90f4b2c11..177b373641f 100755 --- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_02226; create table data_02226 (key Int) engine=MergeTree() order by key as select * from numbers(1); @@ -24,7 +24,7 @@ opts=( $CLICKHOUSE_BENCHMARK --query "select * from remote('127.1', $CLICKHOUSE_DATABASE, data_02226)" "${opts[@]}" >& /dev/null ret=$? -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table data_02226; " diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference index be82d744a3b..56293ca0e5d 100644 --- a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -1 +1 @@ -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index d1a3825d286..e47a3033681 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -12,9 +12,9 @@ ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO us ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory, TABLE ENGINE ON MergeTree, TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_memory (x UInt32) engine = Memory;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index fa9c6bd0f94..b9f0a49ec4d 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -37,7 +37,7 @@ IPv6 functions ::ffff:127.0.0.1 :: \N -100000000 +20000000 -- ::ffff:127.0.0.1 -- diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 28f1afff57f..c851cfde927 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -67,11 +67,11 @@ SELECT toIPv6('::.1.2.3'); --{serverError CANNOT_PARSE_IPV6} SELECT toIPv6OrDefault('::.1.2.3'); SELECT toIPv6OrNull('::.1.2.3'); -SELECT count() FROM numbers_mt(100000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); +SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); SELECT '--'; -SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6} +SELECT cast('test' , 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 } SELECT cast('::ffff:127.0.0.1', 'IPv6'); SELECT '--'; diff --git a/tests/queries/0_stateless/02242_subcolumns_sizes.sql b/tests/queries/0_stateless/02242_subcolumns_sizes.sql index d29241131d3..1232e5fc1c2 100644 --- a/tests/queries/0_stateless/02242_subcolumns_sizes.sql +++ b/tests/queries/0_stateless/02242_subcolumns_sizes.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS t_subcolumns_sizes; SET allow_experimental_object_type = 1; -CREATE TABLE t_subcolumns_sizes (id UInt64, arr Array(UInt64), n Nullable(String), d JSON) +CREATE TABLE t_subcolumns_sizes (id UInt64, arr Array(UInt64), n Nullable(String), d Object('json')) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; diff --git a/tests/queries/0_stateless/02246_flatten_tuple.sql b/tests/queries/0_stateless/02246_flatten_tuple.sql index 139f7a621ef..d09e740ee0c 100644 --- a/tests/queries/0_stateless/02246_flatten_tuple.sql +++ b/tests/queries/0_stateless/02246_flatten_tuple.sql @@ -12,7 +12,7 @@ INSERT INTO t_flatten_tuple VALUES (([(1, 'a'), (2, 'b')], 3, ('c', 4))); SELECT flattenTuple(t) AS ft, toTypeName(ft) FROM t_flatten_tuple; SET allow_experimental_object_type = 1; -CREATE TABLE t_flatten_object(data JSON) ENGINE = Memory; +CREATE TABLE t_flatten_object(data Object('json')) ENGINE = Memory; INSERT INTO t_flatten_object VALUES ('{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}'); INSERT INTO t_flatten_object VALUES ('{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}'); diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh index 66417e9694a..09f9c0c8a98 100755 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - $CLICKHOUSE_CLIENT -nmq " + $CLICKHOUSE_CLIENT -mq " DROP USER IF EXISTS with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; DROP USER IF EXISTS without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; DROP DATABASE IF EXISTS db_with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; @@ -15,7 +15,7 @@ function cleanup() cleanup trap cleanup EXIT -$CLICKHOUSE_CLIENT -nmq " +$CLICKHOUSE_CLIENT -mq " CREATE USER with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; CREATE USER without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; diff --git a/tests/queries/0_stateless/02262_column_ttl.sh b/tests/queries/0_stateless/02262_column_ttl.sh index b5e29c9b2a1..c620d3b6d9c 100755 --- a/tests/queries/0_stateless/02262_column_ttl.sh +++ b/tests/queries/0_stateless/02262_column_ttl.sh @@ -14,7 +14,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # note, that this should be written in .sh since we need $CLICKHOUSE_DATABASE # not 'default' to catch text_log -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists ttl_02262; drop table if exists this_text_log; @@ -31,7 +31,7 @@ $CLICKHOUSE_CLIENT -nm -q " ttl_02262_uuid=$($CLICKHOUSE_CLIENT -q "select uuid from system.tables where database = '$CLICKHOUSE_DATABASE' and name = 'ttl_02262'") -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " -- OPTIMIZE TABLE x FINAL will be done in background -- attach to it's log, via table UUID in query_id (see merger/mutator code). create materialized view this_text_log engine=Memory() as diff --git a/tests/queries/0_stateless/02286_parallel_final.sh b/tests/queries/0_stateless/02286_parallel_final.sh index 0ac510208f3..47dfad42e11 100755 --- a/tests/queries/0_stateless/02286_parallel_final.sh +++ b/tests/queries/0_stateless/02286_parallel_final.sh @@ -9,7 +9,7 @@ echo "Test intersecting ranges" test_random_values() { layers=$1 - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " drop table if exists tbl_8parts_${layers}granules_rnd; create table tbl_8parts_${layers}granules_rnd (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 % 8); insert into tbl_8parts_${layers}granules_rnd select number, 1 from numbers_mt($((layers * 8 * 8192))); @@ -29,7 +29,7 @@ echo "Test non intersecting ranges" test_sequential_values() { layers=$1 - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " drop table if exists tbl_8parts_${layers}granules_seq; create table tbl_8parts_${layers}granules_seq (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 / $((layers * 8192)))::UInt64; insert into tbl_8parts_${layers}granules_seq select number, 1 from numbers_mt($((layers * 8 * 8192))); diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference index 21348493d1d..916cdaf83cd 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(`1` Tuple(`2` Int32, `3` Int32), `4` Int32)\n)\nENGINE = Memory +CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(\n `1` Tuple(\n `2` Int32,\n `3` Int32),\n `4` Int32)\n)\nENGINE = Memory {"t":{"1":{"2":2,"3":3},"4":4}} 2 3 4 2 3 4 diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index 151ff275f7b..8c26b93aedd 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -28,7 +28,7 @@ SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; DROP TABLE t_tuple_numeric; SET allow_experimental_object_type = 1; -CREATE TABLE t_tuple_numeric (t JSON) ENGINE = Memory; +CREATE TABLE t_tuple_numeric (t Object('json')) ENGINE = Memory; INSERT INTO t_tuple_numeric FORMAT JSONEachRow {"t":{"1":{"2":2,"3":3},"4":4}} SELECT toTypeName(t) FROM t_tuple_numeric LIMIT 1; diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index a08928a773c..0d8a568fef0 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -6,8 +6,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh +# Sanity check to ensure that the server is up and running +for _ in {1..10}; do + echo 'SELECT 1' | ${CLICKHOUSE_CURL_COMMAND} -s "${CLICKHOUSE_URL}" --data-binary @- > /dev/null + if [ $? -eq 0 ]; then + break + fi + sleep 1 +done + CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ - ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) + ${CLICKHOUSE_CURL_COMMAND} --max-time 3 -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) READ_ROWS=$(echo "${CURL_OUTPUT}" | \ grep 'X-ClickHouse-Summary' | \ @@ -20,6 +29,7 @@ then echo "Read rows in summary is not zero" else echo "Read rows in summary is zero!" + echo "${CURL_OUTPUT}" fi # Check that the response code is correct too diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql index 14baa3fddfe..b4687541360 100644 --- a/tests/queries/0_stateless/02293_ttest_large_samples.sql +++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql @@ -1,3 +1,5 @@ +-- Tags: long + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5) FROM ( SELECT studentTTest(sample, variant) as result @@ -15,6 +17,8 @@ SELECT FROM system.numbers limit 500000)); +SET max_rows_to_read = 0; + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5 ) FROM ( SELECT studentTTest(sample, variant) as result diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh index 7a18b8fea29..27dbd3e3de6 100755 --- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh @@ -23,16 +23,16 @@ function check_output() { # TCP CLIENT echo "TCP CLIENT" -OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) check_output "${OUTPUT}" echo "TCP CLIENT WITH SETTINGS IN QUERY" -OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) check_output "${OUTPUT}" # HTTP CLIENT echo "HTTP CLIENT" -OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \ +OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=${MAX_TIMEOUT}&max_rows_to_read=0" -d \ "SELECT count() FROM system.numbers" || true) check_output "${OUTPUT}" diff --git a/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference b/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference index fa7f1799c31..6b1e4743867 100644 --- a/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference +++ b/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference @@ -1 +1 @@ -Array(LowCardinality(String)), Const(size = 1, Array(size = 1, UInt64(size = 1), ColumnLowCardinality(size = 2, UInt8(size = 2), ColumnUnique(size = 3, String(size = 3))))) +Array(LowCardinality(String)), Const(size = 1, Array(size = 1, UInt64(size = 1), LowCardinality(size = 2, UInt8(size = 2), Unique(size = 3, String(size = 3))))) diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh index bd7e6be3987..953485c3a1f 100755 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh @@ -23,99 +23,99 @@ $CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % num $CLICKHOUSE_CLIENT -q "select '-- disable optimize_distinct_in_order'" $CLICKHOUSE_CLIENT -q "select '-- distinct all primary key columns -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- enable optimize_distinct_in_order'" $CLICKHOUSE_CLIENT -q "select '-- distinct with all primary key columns -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column _not_ in distinct -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by column in distinct -> final distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by column _not_ in distinct -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by a" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by a" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT echo "-- Check reading in order for distinct" echo "-- disabled, distinct columns match sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "-- enabled, distinct columns match sorting key" # read_in_order_two_level_merge_threshold is set here to avoid repeating MergeTreeInOrder in output -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns DON't form prefix of sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, only part of distinct columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "=== disable new analyzer ===" DISABLE_ANALYZER="set enable_analyzer=0" echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES echo "-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query" ENABLE_READ_IN_ORDER="set optimize_read_in_order=1" echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES echo "=== enable new analyzer ===" ENABLE_ANALYZER="set enable_analyzer=1" echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0 settings optimize_move_to_prewhere=1" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0 settings optimize_move_to_prewhere=1" | eval $FIND_SORTING_PROPERTIES echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES $CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync" diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index f82f79dbe44..6491253cd5f 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError SUPPORT_IS_DISABLED } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02325_dates_schema_inference.reference b/tests/queries/0_stateless/02325_dates_schema_inference.reference index c8eebd3262e..124f105220d 100644 --- a/tests/queries/0_stateless/02325_dates_schema_inference.reference +++ b/tests/queries/0_stateless/02325_dates_schema_inference.reference @@ -1,29 +1,29 @@ JSONEachRow x Nullable(Date) x Nullable(DateTime64(9)) -x Nullable(DateTime64(9)) +x Nullable(DateTime) x Array(Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Tuple(\n date1 Nullable(DateTime64(9)),\n date2 Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Nullable(DateTime64(9)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Tuple(\n date1 Nullable(DateTime),\n date2 Nullable(Date)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Nullable(DateTime) x Array(Nullable(String)) x Nullable(String) x Array(Nullable(String)) -x Tuple(\n key1 Array(Array(Nullable(DateTime64(9)))),\n key2 Array(Array(Nullable(String)))) +x Tuple(\n key1 Array(Array(Nullable(DateTime))),\n key2 Array(Array(Nullable(String)))) CSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -31,14 +31,14 @@ c1 Map(String, Array(Array(Nullable(String)))) TSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -46,14 +46,14 @@ c1 Map(String, Array(Array(Nullable(String)))) Values c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) diff --git a/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh b/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh index 96f80d65878..490f8361682 100755 --- a/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh +++ b/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) data_path="$CLICKHOUSE_TMP/local" -$CLICKHOUSE_LOCAL --path "$data_path" -nm -q " +$CLICKHOUSE_LOCAL --path "$data_path" -m -q " create table ttl_02335 ( date Date, key Int, diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index bf61eb6da0a..eb013200a17 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -1,6 +1,6 @@ -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -16,7 +16,7 @@ ExpressionTransform × 16 ExpressionTransform (ReadFromSystemNumbers) NumbersRange 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -32,7 +32,7 @@ ExpressionTransform × 16 ExpressionTransform × 16 (ReadFromSystemNumbers) NumbersRange × 16 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; (Expression) ExpressionTransform (Sorting) diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index 0f9dbd0247d..24d54293313 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -13,11 +13,9 @@ set allow_prefetched_read_pool_for_local_filesystem = 0; -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; explain pipeline select number from remote('127.0.0.{1,2,3}', system, numbers_mt) group by number settings distributed_aggregation_memory_efficient = 1; diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 6895606eb2b..13429b14866 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 +102400 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 diff --git a/tests/queries/0_stateless/02344_describe_cache.sh b/tests/queries/0_stateless/02344_describe_cache.sh index d91661db9bc..c5373b4d7e3 100755 --- a/tests/queries/0_stateless/02344_describe_cache.sh +++ b/tests/queries/0_stateless/02344_describe_cache.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk'); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk', load_metadata_asynchronously = 0); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql index e9a790bea5d..902e1da543c 100644 --- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql +++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan +SET max_rows_to_read = 0; create table data_02344 (key Int) engine=Null; -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung. diff --git a/tests/queries/0_stateless/02345_implicit_transaction.sql b/tests/queries/0_stateless/02345_implicit_transaction.sql index ee2e0a07c3e..9496de71e13 100644 --- a/tests/queries/0_stateless/02345_implicit_transaction.sql +++ b/tests/queries/0_stateless/02345_implicit_transaction.sql @@ -3,7 +3,7 @@ CREATE TABLE landing (n Int64) engine=MergeTree order by n; CREATE TABLE target (n Int64) engine=MergeTree order by n; CREATE MATERIALIZED VIEW landing_to_target TO target AS - SELECT n + throwIf(n == 3333) + SELECT n + throwIf(n == 3333) AS n FROM landing; INSERT INTO landing SELECT * FROM numbers(10000); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index f6b665713ec..5a799e1c8c1 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -4,6 +4,8 @@ drop table if exists table_2; drop table if exists v_numbers; drop table if exists mv_table; +SET max_rows_to_read = 0; + create table table_1 (x UInt32, y String) engine = MergeTree order by x; insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); diff --git a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql index b324f834053..e7c6c272102 100644 --- a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql +++ b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql @@ -2,5 +2,6 @@ SET max_execution_time = 3; SET timeout_overflow_mode = 'break'; +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM system.numbers_mt WHERE NOT ignore(JSONExtract('{' || repeat('"a":"b",', rand() % 10) || '"c":"d"}', 'a', 'String')) FORMAT Null; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 105fb500461..f9da5b3a73c 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage +SET max_rows_to_read = '101M'; + DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; @@ -23,6 +25,6 @@ select a, b, c, sum(a) as s from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null -settings max_memory_usage = '5Gi'; +settings max_memory_usage = '5Gi', max_result_rows = 0, max_result_bytes = 0; DROP TABLE t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.reference b/tests/queries/0_stateless/02354_vector_search_bugs.reference index d2c2d7e2fb7..8da05c8a7c0 100644 --- a/tests/queries/0_stateless/02354_vector_search_bugs.reference +++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference @@ -1,17 +1,6 @@ +Rejects INSERTs of Arrays with different sizes Issue #52258: Empty Arrays or Arrays with default values are rejected -- Annoy -- Usearch It is possible to create parts with different Array vector sizes but there will be an error at query time -- Annoy -- Usearch Correctness of index with > 1 mark -- Annoy 1 [1,0] 0 9000 [9000,0] 0 -1 (1,0) 0 -9000 (9000,0) 0 -- Usearch -1 [1,0] 0 -9000 [9000,0] 0 -1 (1,0) 0 -9000 (9000,0) 0 diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql index f03c36f6550..51e2e6ce2b7 100644 --- a/tests/queries/0_stateless/02354_vector_search_bugs.sql +++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql @@ -1,61 +1,28 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan +-- Tags: no-fasttest, no-ordinary-database --- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage, --- therefore they are tested in a single file. - --- This file contains tests for various bugs and special cases - -SET allow_experimental_annoy_index = 1; -SET allow_experimental_usearch_index = 1; +-- Tests various bugs and special cases for vector indexes. +SET allow_experimental_vector_similarity_index = 1; SET enable_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make it future-proof DROP TABLE IF EXISTS tab; +SELECT 'Rejects INSERTs of Arrays with different sizes'; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; +INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA } +DROP TABLE tab; + SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected'; -SELECT '- Annoy'; - -CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id); +CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree() ORDER BY id; INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA } INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA } DROP TABLE tab; -CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id); -INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0) -DROP TABLE tab; - -SELECT '- Usearch'; - -CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id); -INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA } -INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA } -DROP TABLE tab; - -CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id); -INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0) -DROP TABLE tab; - SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time'; -SELECT '- Annoy'; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -SYSTEM STOP MERGES tab; -INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]); -INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]); - -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 3; -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } - -DROP TABLE tab; - -SELECT '- Usearch'; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; SYSTEM STOP MERGES tab; INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]); INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]); @@ -70,9 +37,7 @@ DROP TABLE tab; SELECT 'Correctness of index with > 1 mark'; -SELECT '- Annoy'; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000); WITH [1.0, 0.0] AS reference_vec @@ -88,58 +53,3 @@ ORDER BY L2Distance(vec, reference_vec) LIMIT 1; DROP TABLE tab; - --- same, but with Tuples -CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug -INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000); - -WITH (1.0, 0.0) AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -WITH (9000.0, 0.0) AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -DROP TABLE tab; - -SELECT '- Usearch'; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug -INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000); - -WITH [1.0, 0.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -WITH [9000.0, 0.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -DROP TABLE tab; - --- same, but with Tuples -CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug -INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000); - -WITH (1.0, 0.0) AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -WITH (9000.0, 0.0) AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab -ORDER BY L2Distance(vec, reference_vec) -LIMIT 1; - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference index 2f97ed72c52..ab3cc71ff6b 100644 --- a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference +++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference @@ -1,7 +1,2 @@ -Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch -- Annoy -100000000 -100000000 -- Usearch 100000000 100000000 diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql index f15554505f8..acb69cb6ff8 100644 --- a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql +++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql @@ -1,35 +1,18 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan +-- Tags: no-fasttest, no-ordinary-database --- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage, --- therefore they are tested in a single file. +-- Tests that vector search indexes use a (non-standard) index granularity of 100 mio by default. --- This file contains tests for the non-standard default granularity of vector search indexes. - -SET allow_experimental_annoy_index = 1; -SET allow_experimental_usearch_index = 1; - -SELECT 'Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch'; - -SELECT '- Annoy'; +SET allow_experimental_vector_similarity_index = 1; +-- After CREATE TABLE DROP TABLE IF EXISTS tab; -CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE annoy) ENGINE=MergeTree ORDER BY id; -SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx'; - -DROP TABLE tab; -CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id; -ALTER TABLE tab ADD INDEX idx(vec) TYPE annoy; -SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx'; - -SELECT '- Usearch'; - -DROP TABLE tab; -CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE=MergeTree ORDER BY id; -SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx'; - -DROP TABLE tab; -CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id; -ALTER TABLE tab ADD INDEX idx(vec) TYPE usearch; +CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; +SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx'; + +-- After ALTER TABLE +DROP TABLE tab; +CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE = MergeTree ORDER BY id; +ALTER TABLE tab ADD INDEX idx(vec) TYPE vector_similarity('hnsw', 'L2Distance'); SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx'; DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.reference b/tests/queries/0_stateless/02354_vector_search_detach_attach.reference new file mode 100644 index 00000000000..80eb091922e --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.reference @@ -0,0 +1,3 @@ +5 [0,2] 0 +6 [0,2.1] 0.09999990463256836 +7 [0,2.2] 0.20000004768371582 diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql new file mode 100644 index 00000000000..f92eaddbbed --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql @@ -0,0 +1,20 @@ +-- Tags: no-fasttest, no-ordinary-database + +-- Tests that vector similarity indexes can be detached/attached. + +SET allow_experimental_vector_similarity_index = 1; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192; +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); + +DETACH TABLE tab SYNC; +ATTACH TABLE tab; + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference index 43bc49e8adc..5963f4b5834 100644 --- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference +++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference @@ -1,8 +1,7 @@ -At most two index arguments -1st argument (distance function) must be String -Rejects unsupported distance functions -2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch) -Rejects unsupported scalar kinds (only Usearch) +Two or six index arguments +1st argument (method) must be String and hnsw +2nd argument (distance function) must be String and L2Distance or cosineDistance +3nd argument (quantization), if given, must be String and f32, f16, ... +4nd argument (M), if given, must be UInt64 and > 1 Must be created on single column -Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns -Rejects INSERTs of Arrays with different sizes +Must be created on Array(Float32) columns diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql index 6a4d6448629..e8e6aaee1b2 100644 --- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql +++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql @@ -1,63 +1,40 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan +-- Tags: no-fasttest, no-ordinary-database --- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage, --- therefore they are tested in a single file. +-- Tests that various conditions are checked during creation of vector search indexes. --- This file tests that various conditions are checked during creation of vector search indexes. - -SET allow_experimental_annoy_index = 1; -SET allow_experimental_usearch_index = 1; +SET allow_experimental_vector_similarity_index = 1; DROP TABLE IF EXISTS tab; -SELECT 'At most two index arguments'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +SELECT 'Two or six index arguments'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant_have_one_arg')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant', 'have', 'three_args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant', 'have', 'more', 'than', 'six', 'args', '!')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } -SELECT '1st argument (distance function) must be String'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +SELECT '1st argument (method) must be String and hnsw'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3, 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('not_hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } -SELECT 'Rejects unsupported distance functions'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } +SELECT '2nd argument (distance function) must be String and L2Distance or cosineDistance'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'invalid_distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } -SELECT '2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch)'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } - -SELECT 'Rejects unsupported scalar kinds (only Usearch)'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } +SELECT '3nd argument (quantization), if given, must be String and f32, f16, ...'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 1, 1, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'invalid', 2, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } +SELECT '4nd argument (M), if given, must be UInt64 and > 1'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 'invalid', 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 1, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } SELECT 'Must be created on single column'; -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS } -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS } - -SELECT 'Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS } +SELECT 'Must be created on Array(Float32) columns'; SET allow_suspicious_low_cardinality_types = 1; - -CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } -CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } - -CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } -CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } - -CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } -CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } - -CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } -CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } - -CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } -CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } - -SELECT 'Rejects INSERTs of Arrays with different sizes'; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA } -DROP TABLE tab; - -CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA } -DROP TABLE tab; +CREATE TABLE tab(id Int32, vec UInt64, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vec Array(UInt64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference new file mode 100644 index 00000000000..030bfa9b1bd --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference @@ -0,0 +1,2 @@ +Annoy +Usearch diff --git a/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql new file mode 100644 index 00000000000..0889aa74f7a --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql @@ -0,0 +1,43 @@ +-- Indexes of type 'annoy' or 'usearch' are no longer supported. +-- Test what happens when ClickHouse encounters tables with the old index type. + +DROP TABLE IF EXISTS tab; + +SELECT 'Annoy'; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX vec_idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; + +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); -- { serverError ILLEGAL_INDEX } + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; +-- (*) The search succeeds because the index contains no data (i.e. some shortcut) +-- If it had data (can't really test in SQL tests ...), this statement would also return an error, trust me. + +-- Detach and attach should work. +DETACH TABLE tab; +ATTACH TABLE tab; + +DROP TABLE tab; + +SELECT 'Usearch'; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX vec_idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; + +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); -- { serverError ILLEGAL_INDEX } + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; +-- see above: (*) + +-- Detach and attach should work. +DETACH TABLE tab; +ATTACH TABLE tab; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql new file mode 100644 index 00000000000..f1cfc041233 --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql @@ -0,0 +1,14 @@ +-- Tags: no-fasttest, no-ordinary-database + +-- Tests that multiple vector similarity indexes can be created on the same column (even if that makes no sense) + +SET allow_experimental_vector_similarity_index = 1; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity('hnsw', 'L2Distance')); + +ALTER TABLE tab ADD INDEX idx(vec) TYPE minmax; +ALTER TABLE tab ADD INDEX vec_idx1(vec) TYPE vector_similarity('hnsw', 'cosineDistance'); +ALTER TABLE tab ADD INDEX vec_idx2(vec) TYPE vector_similarity('hnsw', 'L2Distance'); -- silly but creating the same index also works for non-vector indexes ... + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference index 41c1915ecc3..cb3a8c801b1 100644 --- a/tests/queries/0_stateless/02354_vector_search_queries.reference +++ b/tests/queries/0_stateless/02354_vector_search_queries.reference @@ -1,41 +1,12 @@ -ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block -- Annoy: WHERE-type +10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block 5 [0,2] 0 6 [0,2.1] 0.09999990463256836 7 [0,2.2] 0.20000004768371582 -- Annoy: ORDER-BY-type -5 [0,2] 0 -6 [0,2.1] 0.09999990463256836 -7 [0,2.2] 0.20000004768371582 -- Usearch: WHERE-type -5 [0,2] 0 -6 [0,2.1] 0.09999990463256836 -7 [0,2.2] 0.20000004768371582 -- Usearch: ORDER-BY-type -5 [0,2] 0 -6 [0,2.1] 0.09999990463256836 -7 [0,2.2] 0.20000004768371582 -- Annoy: WHERE-type, EXPLAIN -Expression ((Projection + Before ORDER BY)) - Limit (preliminary LIMIT (without OFFSET)) - Expression - ReadFromMergeTree (default.tab_annoy) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: idx - Description: annoy GRANULARITY 100000000 - Parts: 1/1 - Granules: 1/1 -- Annoy: ORDER-BY-type, EXPLAIN Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_annoy) + ReadFromMergeTree (default.tab) Indexes: PrimaryKey Condition: true @@ -43,78 +14,18 @@ Expression (Projection) Granules: 1/1 Skip Name: idx - Description: annoy GRANULARITY 100000000 + Description: vector_similarity GRANULARITY 100000000 Parts: 1/1 Granules: 1/1 -- Usearch: WHERE-type, EXPLAIN -Expression ((Projection + Before ORDER BY)) - Limit (preliminary LIMIT (without OFFSET)) - Expression - ReadFromMergeTree (default.tab_usearch) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: idx - Description: usearch GRANULARITY 100000000 - Parts: 1/1 - Granules: 1/1 -- Usearch: ORDER-BY-type, EXPLAIN +12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block +6 [0,2] 0 +7 [0,2.1] 0.09999990463256836 +8 [0,2.2] 0.20000004768371582 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_usearch) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: idx - Description: usearch GRANULARITY 100000000 - Parts: 1/1 - Granules: 1/1 -ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block -- Annoy: WHERE-type -6 [0,2] 0 -7 [0,2.1] 0.09999990463256836 -8 [0,2.2] 0.20000004768371582 -- Annoy: ORDER-BY-type -6 [0,2] 0 -7 [0,2.1] 0.09999990463256836 -8 [0,2.2] 0.20000004768371582 -- Usearch: WHERE-type -6 [0,2] 0 -7 [0,2.1] 0.09999990463256836 -8 [0,2.2] 0.20000004768371582 -- Usearch: ORDER-BY-type -6 [0,2] 0 -7 [0,2.1] 0.09999990463256836 -8 [0,2.2] 0.20000004768371582 -- Annoy: WHERE-type, EXPLAIN -Expression ((Projection + Before ORDER BY)) - Limit (preliminary LIMIT (without OFFSET)) - Expression - ReadFromMergeTree (default.tab_annoy) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 4/4 - Skip - Name: idx - Description: annoy GRANULARITY 2 - Parts: 1/1 - Granules: 1/4 -- Annoy: ORDER-BY-type, EXPLAIN -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_annoy) + ReadFromMergeTree (default.tab) Indexes: PrimaryKey Condition: true @@ -122,30 +33,34 @@ Expression (Projection) Granules: 4/4 Skip Name: idx - Description: annoy GRANULARITY 2 + Description: vector_similarity GRANULARITY 2 Parts: 1/1 Granules: 2/4 -- Usearch: WHERE-type, EXPLAIN -Expression ((Projection + Before ORDER BY)) - Limit (preliminary LIMIT (without OFFSET)) - Expression - ReadFromMergeTree (default.tab_usearch) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 4/4 - Skip - Name: idx - Description: usearch GRANULARITY 2 - Parts: 1/1 - Granules: 1/4 -- Usearch: ORDER-BY-type, EXPLAIN +Special cases +-- Non-default metric, M, ef_construction, ef_search +6 [1,9.3] 0.005731362878640178 +1 [2,3.2] 0.15200169244542905 +7 [5.5,4.7] 0.3503476876550442 +-- Setting "max_limit_for_ann_queries" Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_usearch) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 +-- Non-default quantization +1 [2,3.2] 2.3323807824711897 +2 [4.2,3.4] 4.427188573446585 +0 [4.6,2.3] 4.609772130377966 +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab_f32) Indexes: PrimaryKey Condition: true @@ -153,48 +68,46 @@ Expression (Projection) Granules: 4/4 Skip Name: idx - Description: usearch GRANULARITY 2 + Description: vector_similarity GRANULARITY 2 Parts: 1/1 Granules: 2/4 -TUPLE vectors and special cases -- Annoy: WHERE-type -0 (4.6,2.3) 0.5527864045000421 -1 (2,3.2) 0.15200169244542905 -2 (4.2,3.4) 0.37080174340866845 -- Annoy: ORDER-BY-type -6 (1,9.3) 0.005731362878640178 -1 (2,3.2) 0.15200169244542905 -7 (5.5,4.7) 0.3503476876550442 -- Usearch: WHERE-type -0 (4.6,2.3) 0.5527864045000421 -1 (2,3.2) 0.15200169244542905 -2 (4.2,3.4) 0.37080174340866845 -- Usearch: ORDER-BY-type -6 (1,9.3) 0.005731362878640178 -1 (2,3.2) 0.15200169244542905 -7 (5.5,4.7) 0.3503476876550442 -- Special case: MaximumDistance is negative -- Special case: MaximumDistance is negative -- Special case: setting "annoy_index_search_k_nodes" -- Special case: setting "max_limit_for_ann_queries" +1 [2,3.2] 2.3323807824711897 +2 [4.2,3.4] 4.427188573446585 +0 [4.6,2.3] 4.609772130377966 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_annoy) + ReadFromMergeTree (default.tab_f16) Indexes: PrimaryKey Condition: true Parts: 1/1 Granules: 4/4 -- Special case: setting "max_limit_for_ann_queries" + Skip + Name: idx + Description: vector_similarity GRANULARITY 2 + Parts: 1/1 + Granules: 2/4 +1 [2,3.2] 2.3323807824711897 +2 [4.2,3.4] 4.427188573446585 +0 [4.6,2.3] 4.609772130377966 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - ReadFromMergeTree (default.tab_usearch) + ReadFromMergeTree (default.tab_i8) Indexes: PrimaryKey Condition: true Parts: 1/1 Granules: 4/4 + Skip + Name: idx + Description: vector_similarity GRANULARITY 2 + Parts: 1/1 + Granules: 2/4 +-- Index on Array(Float64) column +6 [0,2] 0 +7 [0,2.1] 0.10000000000000009 +8 [0,2.2] 0.20000000000000018 diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql index 64051aa8544..fbf8427d8fe 100644 --- a/tests/queries/0_stateless/02354_vector_search_queries.sql +++ b/tests/queries/0_stateless/02354_vector_search_queries.sql @@ -1,238 +1,138 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan +-- Tags: no-fasttest, no-ordinary-database --- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage, --- therefore they are tested in a single file. +-- Tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes. --- This file tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes. +SET allow_experimental_vector_similarity_index = 1; -SET allow_experimental_annoy_index = 1; -SET allow_experimental_usearch_index = 1; +SET enable_analyzer = 0; -SELECT 'ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block'; +SELECT '10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block'; -DROP TABLE IF EXISTS tab_annoy; -DROP TABLE IF EXISTS tab_usearch; +DROP TABLE IF EXISTS tab; -CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192; -INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); - -CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192; -INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192; +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); -SELECT '- Annoy: WHERE-type'; WITH [0.0, 2.0] AS reference_vec SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Annoy: ORDER-BY-type'; -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy +FROM tab ORDER BY L2Distance(vec, reference_vec) LIMIT 3; -SELECT '- Usearch: WHERE-type'; +EXPLAIN indexes = 1 WITH [0.0, 2.0] AS reference_vec SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Usearch: ORDER-BY-type'; -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch +FROM tab ORDER BY L2Distance(vec, reference_vec) LIMIT 3; -SELECT '- Annoy: WHERE-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; +DROP TABLE tab; + + +SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block'; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]); -SELECT '- Annoy: ORDER-BY-type, EXPLAIN'; -EXPLAIN indexes=1 WITH [0.0, 2.0] AS reference_vec SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy +FROM tab ORDER BY L2Distance(vec, reference_vec) LIMIT 3; -SELECT '- Usearch: WHERE-type, EXPLAIN'; -EXPLAIN indexes=1 +EXPLAIN indexes = 1 WITH [0.0, 2.0] AS reference_vec SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Usearch: ORDER-BY-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch +FROM tab ORDER BY L2Distance(vec, reference_vec) LIMIT 3; -DROP TABLE tab_annoy; -DROP TABLE tab_usearch; +DROP TABLE tab; -SELECT 'ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block'; +SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen. -CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; -INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]); +SELECT '-- Non-default metric, M, ef_construction, ef_search'; +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f32', 42, 99, 66) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]); -CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; -INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]); - -SELECT '- Annoy: WHERE-type'; WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Annoy: ORDER-BY-type'; -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -ORDER BY L2Distance(vec, reference_vec) -LIMIT 3; - -SELECT '- Usearch: WHERE-type'; -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Usearch: ORDER-BY-type'; -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -ORDER BY L2Distance(vec, reference_vec) -LIMIT 3; - -SELECT '- Annoy: WHERE-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Annoy: ORDER-BY-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_annoy -ORDER BY L2Distance(vec, reference_vec) -LIMIT 3; - -SELECT '- Usearch: WHERE-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -WHERE L2Distance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Usearch: ORDER-BY-type, EXPLAIN'; -EXPLAIN indexes=1 -WITH [0.0, 2.0] AS reference_vec -SELECT id, vec, L2Distance(vec, reference_vec) -FROM tab_usearch -ORDER BY L2Distance(vec, reference_vec) -LIMIT 3; - -DROP TABLE tab_annoy; -DROP TABLE tab_usearch; - - -SELECT 'TUPLE vectors and special cases'; --- Not a systematic test, just to check that no bad things happen. --- Just for jun, use metric = 'cosineDistance' (Annoy/Usearch), tree_count = 200 (Annoy), scalarKind = 'f64' (Usearch) - -CREATE TABLE tab_annoy(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy('cosineDistance', 200) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; -INSERT INTO tab_annoy VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2)); - -CREATE TABLE tab_usearch(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; -INSERT INTO tab_usearch VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2)); - -SELECT '- Annoy: WHERE-type'; -WITH (0.0, 2.0) AS reference_vec SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_annoy -WHERE cosineDistance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Annoy: ORDER-BY-type'; -WITH (0.0, 2.0) AS reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_annoy +FROM tab ORDER BY cosineDistance(vec, reference_vec) LIMIT 3; -SELECT '- Usearch: WHERE-type'; -WITH (0.0, 2.0) AS reference_vec +SELECT '-- Setting "max_limit_for_ann_queries"'; +EXPLAIN indexes=1 +WITH [0.0, 2.0] as reference_vec SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_usearch -WHERE cosineDistance(vec, reference_vec) < 1.0 -LIMIT 3; - -SELECT '- Usearch: ORDER-BY-type'; -WITH (0.0, 2.0) AS reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_usearch -ORDER BY cosineDistance(vec, reference_vec) -LIMIT 3; - -SELECT '- Special case: MaximumDistance is negative'; -WITH (0.0, 2.0) as reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_annoy -WHERE cosineDistance(vec, reference_vec) < -1.0 -LIMIT 3; -- { serverError INCORRECT_QUERY } - -SELECT '- Special case: MaximumDistance is negative'; -WITH (0.0, 2.0) as reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_usearch -WHERE cosineDistance(vec, reference_vec) < -1.0 -LIMIT 3; -- { serverError INCORRECT_QUERY } - -SELECT '- Special case: setting "annoy_index_search_k_nodes"'; -WITH (0.0, 2.0) as reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_annoy +FROM tab ORDER BY cosineDistance(vec, reference_vec) LIMIT 3 -SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results +SETTINGS max_limit_for_ann_queries = 2; -- LIMIT 3 > 2 --> don't use the ann index -SELECT '- Special case: setting "max_limit_for_ann_queries"'; -EXPLAIN indexes=1 -WITH (0.0, 2.0) as reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_annoy -ORDER BY cosineDistance(vec, reference_vec) -LIMIT 3 -SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index +DROP TABLE tab; -SELECT '- Special case: setting "max_limit_for_ann_queries"'; -EXPLAIN indexes=1 -WITH (0.0, 2.0) as reference_vec -SELECT id, vec, cosineDistance(vec, reference_vec) -FROM tab_usearch -ORDER BY cosineDistance(vec, reference_vec) -LIMIT 3 -SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index +SELECT '-- Non-default quantization'; +CREATE TABLE tab_f32(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +CREATE TABLE tab_f16(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f16', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +CREATE TABLE tab_i8(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'i8', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +INSERT INTO tab_f32 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]); +INSERT INTO tab_f16 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]); +INSERT INTO tab_i8 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]); -DROP TABLE tab_annoy; -DROP TABLE tab_usearch; +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_f32 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +EXPLAIN indexes = 1 +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_f32 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_f16 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +EXPLAIN indexes = 1 +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_f16 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_i8 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +EXPLAIN indexes = 1 +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab_i8 +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +DROP TABLE tab_f32; +DROP TABLE tab_f16; +DROP TABLE tab_i8; + +SELECT '-- Index on Array(Float64) column'; +CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]); + +WITH [0.0, 2.0] AS reference_vec +SELECT id, vec, L2Distance(vec, reference_vec) +FROM tab +ORDER BY L2Distance(vec, reference_vec) +LIMIT 3; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.reference b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql new file mode 100644 index 00000000000..da6494bf831 --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql @@ -0,0 +1,23 @@ +-- Tags: no-fasttest, no-ordinary-database + +SET allow_experimental_vector_similarity_index = 1; + +-- Tests that quoted and unquoted parameters can be passed to vector search indexes. + +DROP TABLE IF EXISTS tab1; +DROP TABLE IF EXISTS tab2; + +CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity('hnsw', 'L2Distance')); +CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity(hnsw, L2Distance)); + +DROP TABLE tab1; +DROP TABLE tab2; + +CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id); +CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id); + +ALTER TABLE tab1 ADD INDEX idx1(vec) TYPE vector_similarity('hnsw', 'L2Distance'); +ALTER TABLE tab2 ADD INDEX idx2(vec) TYPE vector_similarity(hnsw, L2Distance); + +DROP TABLE tab1; +DROP TABLE tab2; diff --git a/tests/queries/0_stateless/02355_column_type_name_lc.reference b/tests/queries/0_stateless/02355_column_type_name_lc.reference index 234a072299f..50c25a86b2f 100644 --- a/tests/queries/0_stateless/02355_column_type_name_lc.reference +++ b/tests/queries/0_stateless/02355_column_type_name_lc.reference @@ -1 +1 @@ -ColumnLowCardinality +LowCardinality(String) diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index 98c9cf9b7b4..73bf3fa120a 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_fsync_pe; create table data_fsync_pe (key Int) engine=MergeTree() @@ -27,7 +27,7 @@ for i in {1..100}; do $CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data_fsync_pe values (1)" read -r FileSync FileOpen DirectorySync FileSyncElapsedMicroseconds DirectorySyncElapsedMicroseconds <<<"$( - $CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q " + $CLICKHOUSE_CLIENT -m --param_query_id "$query_id" -q " system flush logs; select diff --git a/tests/queries/0_stateless/02372_now_in_block.sql b/tests/queries/0_stateless/02372_now_in_block.sql index aee4572ce8d..d0aec471801 100644 --- a/tests/queries/0_stateless/02372_now_in_block.sql +++ b/tests/queries/0_stateless/02372_now_in_block.sql @@ -1,3 +1,4 @@ +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM (SELECT DISTINCT nowInBlock(), nowInBlock('Pacific/Pitcairn') FROM system.numbers LIMIT 2); SELECT nowInBlock(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT nowInBlock(NULL) IS NULL; diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh index 71e3b6961f8..46396d38747 100755 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT \ table_name="t_02377_extend_protocol_with_query_parameters_$RANDOM$RANDOM" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " create table $table_name( id Int64, arr Array(UInt8), @@ -57,17 +57,17 @@ $CLICKHOUSE_CLIENT \ # it is possible to set parameter for the current session -$CLICKHOUSE_CLIENT -n -q "set param_n = 42; select {n: UInt8}" +$CLICKHOUSE_CLIENT -q "set param_n = 42; select {n: UInt8}" # and it will not be visible to other sessions -$CLICKHOUSE_CLIENT -n -q "select {n: UInt8} -- { serverError 456 }" +$CLICKHOUSE_CLIENT -q "select {n: UInt8} -- { serverError 456 }" # the same parameter could be set multiple times within one session (new value overrides the previous one) -$CLICKHOUSE_CLIENT -n -q "set param_n = 12; set param_n = 13; select {n: UInt8}" +$CLICKHOUSE_CLIENT -q "set param_n = 12; set param_n = 13; select {n: UInt8}" # multiple different parameters could be defined within each session -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " set param_a = 13, param_b = 'str'; set param_c = '2022-08-04 18:30:53'; set param_d = '{\'10\': [11, 12], \'13\': [14, 15]}'; diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh index 4b9793da5bb..52f48dcbb91 100755 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh @@ -15,14 +15,14 @@ FIND_SORTMODE="$GREP_SORTMODE | $TRIM_LEADING_SPACES" function explain_sorting { echo "-- QUERY: "$1 - $CLICKHOUSE_CLIENT --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTING + $CLICKHOUSE_CLIENT --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTING } function explain_sortmode { echo "-- QUERY: "$1 - $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE + $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE echo "-- QUERY (analyzer): "$1 - $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE + $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE } $CLICKHOUSE_CLIENT -q "drop table if exists optimize_sorting sync" diff --git a/tests/queries/0_stateless/02378_part_log_profile_events.sql b/tests/queries/0_stateless/02378_part_log_profile_events.sql index 38d3f8b4c05..eec76d6f50e 100644 --- a/tests/queries/0_stateless/02378_part_log_profile_events.sql +++ b/tests/queries/0_stateless/02378_part_log_profile_events.sql @@ -39,7 +39,7 @@ SYSTEM FLUSH LOGS; SELECT if(count() == 2, 'Ok', 'Error: ' || toString(count())), - if(SUM(ProfileEvents['MergedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows']))), + if(SUM(ProfileEvents['MutatedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MutatedRows']))), if(SUM(ProfileEvents['FileOpen']) > 1, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['FileOpen']))) FROM system.part_log WHERE event_time > now() - INTERVAL 10 MINUTE diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.sh b/tests/queries/0_stateless/02383_join_and_filtering_set.sh index 3a6d60811c9..a3f12381c97 100755 --- a/tests/queries/0_stateless/02383_join_and_filtering_set.sh +++ b/tests/queries/0_stateless/02383_join_and_filtering_set.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -mn -q """ +$CLICKHOUSE_CLIENT -m -q """ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02404_data.CSV b/tests/queries/0_stateless/02404_data.CSV new file mode 100644 index 00000000000..2d8b5c8daa8 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSV @@ -0,0 +1,10 @@ +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CSVWithNames b/tests/queries/0_stateless/02404_data.CSVWithNames new file mode 100644 index 00000000000..34647008916 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSVWithNames @@ -0,0 +1,11 @@ +"number","toDate(number)" +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CustomSeparated b/tests/queries/0_stateless/02404_data.CustomSeparated new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CustomSeparated @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.JSONCompactEachRow b/tests/queries/0_stateless/02404_data.JSONCompactEachRow new file mode 100644 index 00000000000..de2e0986aab --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONCompactEachRow @@ -0,0 +1,10 @@ +["0", "1970-01-01"] +["1", "1970-01-02"] +["2", "1970-01-03"] +["3", "1970-01-04"] +["4", "1970-01-05"] +["5", "1970-01-06"] +["6", "1970-01-07"] +["7", "1970-01-08"] +["8", "1970-01-09"] +["9", "1970-01-10"] diff --git a/tests/queries/0_stateless/02404_data.JSONEachRow b/tests/queries/0_stateless/02404_data.JSONEachRow new file mode 100644 index 00000000000..e77256ac7fc --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONEachRow @@ -0,0 +1,10 @@ +{"number":"0","toDate(number)":"1970-01-01"} +{"number":"1","toDate(number)":"1970-01-02"} +{"number":"2","toDate(number)":"1970-01-03"} +{"number":"3","toDate(number)":"1970-01-04"} +{"number":"4","toDate(number)":"1970-01-05"} +{"number":"5","toDate(number)":"1970-01-06"} +{"number":"6","toDate(number)":"1970-01-07"} +{"number":"7","toDate(number)":"1970-01-08"} +{"number":"8","toDate(number)":"1970-01-09"} +{"number":"9","toDate(number)":"1970-01-10"} diff --git a/tests/queries/0_stateless/02404_data.TSKV b/tests/queries/0_stateless/02404_data.TSKV new file mode 100644 index 00000000000..70f7ad33c8b --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSKV @@ -0,0 +1,10 @@ +number=0 toDate(number)=1970-01-01 +number=1 toDate(number)=1970-01-02 +number=2 toDate(number)=1970-01-03 +number=3 toDate(number)=1970-01-04 +number=4 toDate(number)=1970-01-05 +number=5 toDate(number)=1970-01-06 +number=6 toDate(number)=1970-01-07 +number=7 toDate(number)=1970-01-08 +number=8 toDate(number)=1970-01-09 +number=9 toDate(number)=1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSV b/tests/queries/0_stateless/02404_data.TSV new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSV @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSVWithNames b/tests/queries/0_stateless/02404_data.TSVWithNames new file mode 100644 index 00000000000..23310234a8c --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSVWithNames @@ -0,0 +1,11 @@ +number toDate(number) +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.Values b/tests/queries/0_stateless/02404_data.Values new file mode 100644 index 00000000000..d9a621d7ec9 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.Values @@ -0,0 +1 @@ +(0,'1970-01-01'),(1,'1970-01-02'),(2,'1970-01-03'),(3,'1970-01-04'),(4,'1970-01-05'),(5,'1970-01-06'),(6,'1970-01-07'),(7,'1970-01-08'),(8,'1970-01-09'),(9,'1970-01-10') \ No newline at end of file diff --git a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference index 049603328d9..3d6b1021916 100644 --- a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference +++ b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference @@ -4,7 +4,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -14,7 +14,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -24,7 +24,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -34,7 +34,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -44,7 +44,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -54,7 +54,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -64,7 +64,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -74,7 +74,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -84,7 +84,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c39f1fb1ce9..0980e25b70f 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -416,7 +416,6 @@ logTrace lowCardinalityIndices lowCardinalityKeys lower -lowerUTF8 makeDate makeDate32 makeDateTime @@ -897,7 +896,6 @@ tupleToNameValuePairs unbin unhex upper -upperUTF8 uptime validateNestedArraySizes version diff --git a/tests/queries/0_stateless/02417_load_marks_async.sh b/tests/queries/0_stateless/02417_load_marks_async.sh index 950656e7ab6..bcede9e4f5e 100755 --- a/tests/queries/0_stateless/02417_load_marks_async.sh +++ b/tests/queries/0_stateless/02417_load_marks_async.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test;" -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE test ( n0 UInt64, diff --git a/tests/queries/0_stateless/02421_new_type_json_async_insert.reference b/tests/queries/0_stateless/02421_new_type_json_async_insert.reference new file mode 100644 index 00000000000..fdd133460c6 --- /dev/null +++ b/tests/queries/0_stateless/02421_new_type_json_async_insert.reference @@ -0,0 +1,5 @@ +INCORRECT_DATA +0 +0 +INCORRECT_DATA +aaa diff --git a/tests/queries/0_stateless/02421_new_type_json_async_insert.sh b/tests/queries/0_stateless/02421_new_type_json_async_insert.sh new file mode 100755 index 00000000000..b23470a4179 --- /dev/null +++ b/tests/queries/0_stateless/02421_new_type_json_async_insert.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_async_insert" +$CLICKHOUSE_CLIENT --allow_experimental_json_type=1 -q "CREATE TABLE t_json_async_insert (data JSON) ENGINE = MergeTree ORDER BY tuple()" + +$CLICKHOUSE_CLIENT --async_insert=1 --wait_for_async_insert=1 -q 'INSERT INTO t_json_async_insert FORMAT JSONAsObject {"aaa"}' 2>&1 | grep -o -m1 "INCORRECT_DATA" +$CLICKHOUSE_CLIENT -q "SELECT count() FROM t_json_async_insert" +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_json_async_insert'" + +$CLICKHOUSE_CLIENT --async_insert=1 --wait_for_async_insert=1 -q 'INSERT INTO t_json_async_insert FORMAT JSONAsObject {"aaa"}' 2>&1 | grep -o -m1 "INCORRECT_DATA" & +$CLICKHOUSE_CLIENT --async_insert=1 --wait_for_async_insert=1 -q 'INSERT INTO t_json_async_insert FORMAT JSONAsObject {"k1": "aaa"}' & + +wait + +$CLICKHOUSE_CLIENT -q "SELECT data.k1 FROM t_json_async_insert ORDER BY data.k1" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_async_insert" diff --git a/tests/queries/0_stateless/02421_new_type_json_empty_parts.reference b/tests/queries/0_stateless/02421_new_type_json_empty_parts.reference new file mode 100644 index 00000000000..172ea2d3eed --- /dev/null +++ b/tests/queries/0_stateless/02421_new_type_json_empty_parts.reference @@ -0,0 +1,16 @@ +Collapsing +0 +0 +DELETE all +2 +1 +('k1','String') +('k2','String') +0 +0 +TTL +1 +1 +('k2','String') +0 +0 diff --git a/tests/queries/0_stateless/02421_new_type_json_empty_parts.sh b/tests/queries/0_stateless/02421_new_type_json_empty_parts.sh new file mode 100755 index 00000000000..2714b9586f8 --- /dev/null +++ b/tests/queries/0_stateless/02421_new_type_json_empty_parts.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +set -euo pipefail + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./parts.lib +. "$CURDIR"/parts.lib + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'Collapsing';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, s Int8, data JSON) ENGINE = CollapsingMergeTree(s) ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, 1, '{\"k1\": \"aaa\"}') (1, -1, '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_empty_parts ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'DELETE all';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, data JSON) ENGINE = MergeTree ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '{\"k1\": \"aaa\"}') (1, '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_empty_parts ORDER BY path" +${CLICKHOUSE_CLIENT} -q "ALTER TABLE t_json_empty_parts DELETE WHERE 1 SETTINGS mutations_sync = 1;" +timeout 60 bash -c 'wait_for_delete_empty_parts t_json_empty_parts' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_empty_parts ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'TTL';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, d Date, data JSON) ENGINE = MergeTree ORDER BY id TTL d WHERE id % 2 = 1 SETTINGS old_parts_lifetime=5;" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '2000-01-01', '{\"k1\": \"aaa\"}') (2, '2000-01-01', '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_json_empty_parts FINAL;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_empty_parts ORDER BY path" +${CLICKHOUSE_CLIENT} -q "ALTER TABLE t_json_empty_parts MODIFY TTL d;" +${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_json_empty_parts FINAL;" +timeout 60 bash -c 'wait_for_delete_empty_parts t_json_empty_parts' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) AS path FROM t_json_empty_parts ORDER BY path" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" diff --git a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh index 98b571c5968..91e85eabcb8 100755 --- a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # $2 - query function execute_query() { - ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -nq " + ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -q " ${2} " } @@ -18,7 +18,7 @@ function execute_query() # so we only to check the db.statement only function check_query_span_query_only() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query FROM system.opentelemetry_span_log @@ -31,7 +31,7 @@ ${CLICKHOUSE_CLIENT} -nq " function check_query_span() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query, attribute['clickhouse.read_rows'] as read_rows, @@ -47,7 +47,7 @@ ${CLICKHOUSE_CLIENT} -nq " # # Set up # -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; CREATE TABLE ${CLICKHOUSE_DATABASE}.opentelemetry_test (id UInt64) Engine=MergeTree Order By id; " @@ -79,4 +79,4 @@ check_query_span $query_id # ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02421_type_json_async_insert.sh b/tests/queries/0_stateless/02421_type_json_async_insert.sh index 8aa0d510dbb..73d66d116ce 100755 --- a/tests/queries/0_stateless/02421_type_json_async_insert.sh +++ b/tests/queries/0_stateless/02421_type_json_async_insert.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_async_insert" -$CLICKHOUSE_CLIENT --allow_experimental_object_type=1 -q "CREATE TABLE t_json_async_insert (data JSON) ENGINE = MergeTree ORDER BY tuple()" +$CLICKHOUSE_CLIENT --allow_experimental_object_type=1 -q "CREATE TABLE t_json_async_insert (data Object('json')) ENGINE = MergeTree ORDER BY tuple()" $CLICKHOUSE_CLIENT --async_insert=1 --wait_for_async_insert=1 -q 'INSERT INTO t_json_async_insert FORMAT JSONAsObject {"aaa"}' 2>&1 | grep -o -m1 "Cannot parse object" $CLICKHOUSE_CLIENT -q "SELECT count() FROM t_json_async_insert" diff --git a/tests/queries/0_stateless/02421_type_json_empty_parts.sh b/tests/queries/0_stateless/02421_type_json_empty_parts.sh index b6cf5995bfa..2ecec524e25 100755 --- a/tests/queries/0_stateless/02421_type_json_empty_parts.sh +++ b/tests/queries/0_stateless/02421_type_json_empty_parts.sh @@ -11,7 +11,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" ${CLICKHOUSE_CLIENT} -q "SELECT 'Collapsing';" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, s Int8, data JSON) ENGINE = CollapsingMergeTree(s) ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, s Int8, data Object('json')) ENGINE = CollapsingMergeTree(s) ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, 1, '{\"k1\": \"aaa\"}') (1, -1, '{\"k2\": \"bbb\"}');" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" @@ -19,7 +19,7 @@ ${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" ${CLICKHOUSE_CLIENT} -q "SELECT 'DELETE all';" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, data JSON) ENGINE = MergeTree ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, data Object('json')) ENGINE = MergeTree ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '{\"k1\": \"aaa\"}') (1, '{\"k2\": \"bbb\"}');" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" @@ -32,7 +32,7 @@ ${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" ${CLICKHOUSE_CLIENT} -q "SELECT 'TTL';" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, d Date, data JSON) ENGINE = MergeTree ORDER BY id TTL d WHERE id % 2 = 1 SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, d Date, data Object('json')) ENGINE = MergeTree ORDER BY id TTL d WHERE id % 2 = 1 SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '2000-01-01', '{\"k1\": \"aaa\"}') (2, '2000-01-01', '{\"k2\": \"bbb\"}');" ${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_json_empty_parts FINAL;" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 7136698d5b7..b23c4f376fc 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -39,7 +39,7 @@ function check_span() extra_condition="" fi - ret=$(${CLICKHOUSE_CLIENT} -nq " + ret=$(${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT count() diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 0cd520d8d5d..96692ba325a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -5,20 +5,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Test that running distributed query and cancel it ASAP, -# this can trigger a hung/deadlock in ProcessorList. -for i in {1..50}; do +# Test that runs a distributed query and cancels it ASAP, +# this has a chance to trigger a hung/deadlock in ProcessorList. +for i in {1..50} +do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" - $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & - while :; do + $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 --max_bytes_to_read 0 --max_result_rows 0 --max_result_bytes 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & + while true + do killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)" - if [[ "$killed_queries" -ge 1 ]]; then + if [[ "$killed_queries" -ge 1 ]] + then break fi done wait -n query_return_status=$? - if [[ $query_return_status -eq 0 ]]; then + if [[ $query_return_status -eq 0 ]] + then echo "Query $query_id should be cancelled, however it returns successfully" fi done diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh index ae3ea017fbb..178da822d41 100755 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists insert_select_progress_tcp; create table insert_select_progress_tcp(s UInt16) engine = MergeTree order by s; " diff --git a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh index 4c850a6ec9e..92f519a9f8a 100755 --- a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh +++ b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table; CREATE TABLE test_table ( @@ -74,7 +74,7 @@ $CLICKHOUSE_CLIENT -q "SELECT 1 AS constant_value, arrayMap(lambda_argument -> l $CLICKHOUSE_CLIENT -q "WITH 1 AS constant_value SELECT (SELECT constant_valu) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_compound; CREATE TABLE test_table_compound ( @@ -142,7 +142,7 @@ $CLICKHOUSE_CLIENT -q "SELECT cast(tuple(1), 'Tuple(value_1 String)') AS constan $CLICKHOUSE_CLIENT -q "WITH cast(tuple(1), 'Tuple(value_1 String)') AS constant_value SELECT (SELECT constant_value.value_) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value.value_1'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_1; CREATE TABLE test_table_1 ( @@ -185,7 +185,7 @@ $CLICKHOUSE_CLIENT -q "SELECT ((1))::Tuple(a Tuple(b UInt32)) AS t, t.a.c SETTIN $CLICKHOUSE_CLIENT -q "SELECT 1"; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE test_table; DROP TABLE test_table_compound; DROP TABLE test_table_1; diff --git a/tests/queries/0_stateless/02480_tets_show_full.sh b/tests/queries/0_stateless/02480_tets_show_full.sh index 5f5040ba128..50184857a1f 100755 --- a/tests/queries/0_stateless/02480_tets_show_full.sh +++ b/tests/queries/0_stateless/02480_tets_show_full.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) database=$($CLICKHOUSE_CLIENT -q 'SELECT currentDatabase()') -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS test_02480_table; DROP VIEW IF EXISTS test_02480_view; CREATE TABLE test_02480_table (id Int64) ENGINE=MergeTree ORDER BY id; diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 6fd6da69b70..1027f18fc83 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -12,7 +12,7 @@ echo "Parquet" DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet" +${CLICKHOUSE_CLIENT} --max_result_rows 0 --max_result_bytes 0 --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" diff --git a/tests/queries/0_stateless/02482_json_nested_arrays_with_same_keys.sh b/tests/queries/0_stateless/02482_json_nested_arrays_with_same_keys.sh index 0d0caa78ea3..e0648f4df6e 100755 --- a/tests/queries/0_stateless/02482_json_nested_arrays_with_same_keys.sh +++ b/tests/queries/0_stateless/02482_json_nested_arrays_with_same_keys.sh @@ -21,7 +21,7 @@ echo ' } }' > 02482_object_data.jsonl -$CLICKHOUSE_LOCAL --allow_experimental_object_type=1 -q "select * from file(02482_object_data.jsonl, auto, 'obj JSON')" +$CLICKHOUSE_LOCAL --allow_experimental_object_type=1 -q "select * from file(02482_object_data.jsonl, auto, 'obj Object(''json'')')" rm 02482_object_data.jsonl diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 5303824d97c..4dc7a7fd99b 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; CREATE TABLE load_parts_refcounts (id UInt32) diff --git a/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.reference b/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.reference new file mode 100644 index 00000000000..3eb1f72bfd6 --- /dev/null +++ b/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.reference @@ -0,0 +1 @@ +{"list":[{"nested":{"x":[{"r":"1"},{"r":"2"}]},"x":[{"r":"1"}]}]} diff --git a/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.sh b/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.sh new file mode 100755 index 00000000000..ae98946ad73 --- /dev/null +++ b/tests/queries/0_stateless/02482_new_json_nested_arrays_with_same_keys.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo ' +{ + "obj" : + { + "list" : + [ + { + "nested" : { + "x" : [{"r" : 1}, {"r" : 2}] + }, + "x" : [{"r" : 1}] + } + ] + } +}' > 02482_object_data.jsonl + +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select * from file(02482_object_data.jsonl, auto, 'obj JSON')" + +rm 02482_object_data.jsonl + diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index f747b3156a5..df7e9386662 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ] then echo "$RES" else + echo "$RES" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" fi diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference index 2f1465d1598..6481b5e0770 100644 --- a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference @@ -1,3 +1,17 @@ +Cache query result in query cache 1 1 +DROP entries with a certain tag, no entry will match +1 +After a full DROP, the cache is empty now +0 +Cache query result with different or no tag in query cache +1 +1 +1 +2 +4 +DROP entries with certain tags +2 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql index bc2e7f442fc..3d064169a4e 100644 --- a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql @@ -4,10 +4,31 @@ -- (it's silly to use what will be tested below but we have to assume other tests cluttered the query cache) SYSTEM DROP QUERY CACHE; --- Cache query result in query cache +SELECT 'Cache query result in query cache'; SELECT 1 SETTINGS use_query_cache = true; SELECT count(*) FROM system.query_cache; --- No query results are cached after DROP +SELECT 'DROP entries with a certain tag, no entry will match'; +SYSTEM DROP QUERY CACHE TAG 'tag'; +SELECT count(*) FROM system.query_cache; + +SELECT 'After a full DROP, the cache is empty now'; SYSTEM DROP QUERY CACHE; SELECT count(*) FROM system.query_cache; + +-- More tests for DROP with tags: + +SELECT 'Cache query result with different or no tag in query cache'; +SELECT 1 SETTINGS use_query_cache = true; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'def'; +SELECT 2 SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; + +SELECT 'DROP entries with certain tags'; +SYSTEM DROP QUERY CACHE TAG ''; +SELECT count(*) FROM system.query_cache; +SYSTEM DROP QUERY CACHE TAG 'def'; +SELECT count(*) FROM system.query_cache; +SYSTEM DROP QUERY CACHE TAG 'abc'; +SELECT count(*) FROM system.query_cache; diff --git a/tests/queries/0_stateless/02494_query_cache_tag.reference b/tests/queries/0_stateless/02494_query_cache_tag.reference new file mode 100644 index 00000000000..f7be5c06ecf --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_tag.reference @@ -0,0 +1,12 @@ +1 +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc +--- +1 +1 +SELECT 1 SETTINGS use_query_cache = true +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc +--- +1 +1 +SELECT 1 SETTINGS use_query_cache = true abc +SELECT 1 SETTINGS use_query_cache = true def diff --git a/tests/queries/0_stateless/02494_query_cache_tag.sql b/tests/queries/0_stateless/02494_query_cache_tag.sql new file mode 100644 index 00000000000..62d36f6ebe6 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_tag.sql @@ -0,0 +1,34 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- Store the result a single query with a tag in the query cache and check that the system table knows about the tag +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; + +SELECT query, tag FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Store the result of the same query with two different tags. The cache should store two entries. +SELECT 1 SETTINGS use_query_cache = true; -- default query_cache_tag = '' +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; +SELECT query, tag FROM system.query_cache ORDER BY ALL; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Like before but the tag is set standalone. + +SET query_cache_tag = 'abc'; +SELECT 1 SETTINGS use_query_cache = true; + +SET query_cache_tag = 'def'; +SELECT 1 SETTINGS use_query_cache = true; + +SELECT query, tag FROM system.query_cache ORDER BY ALL; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..4d004f2f78f 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Projection + Before ORDER BY)) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 646e2501a99..6e132c55628 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -26,15 +26,15 @@ FROM ORDER BY number DESC ) ORDER BY number ASC" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled query_plan_remove_redundant_sorting" @@ -302,6 +302,27 @@ FROM )" run_query "$query" +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" + echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index b6a2e3182df..dd5ac7bf706 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -464,6 +464,36 @@ Expression ((Project names + Projection)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Offset + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query diff --git a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql index d97109b66f3..65519296602 100644 --- a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql +++ b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql @@ -2,7 +2,7 @@ set allow_suspicious_fixed_string_types=1; create table fat_granularity (x UInt32, fat FixedString(160000)) engine = MergeTree order by x settings storage_policy = 's3_cache'; -insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 8192, max_insert_threads=8; +insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 3000, max_insert_threads = 8, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; -- Too large sizes of FixedString to deserialize select x from fat_granularity prewhere fat like '256\_%' settings max_threads=2; diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index aa43e81f131..27243dd47fa 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -13,6 +13,6 @@ $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SEL $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferMMap" && echo 'Fail' || echo 0 $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferOrdinary" && echo 1 || echo 'Fail' -$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -nq "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" +$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -q "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 3c06119e8d2..6fd42fa940a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -24,15 +24,15 @@ FROM ) )" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled $OPTIMIZATION_SETTING" diff --git a/tests/queries/0_stateless/02513_validate_data_types.sql b/tests/queries/0_stateless/02513_validate_data_types.sql index 5eb91ac7879..4996f63c5bd 100644 --- a/tests/queries/0_stateless/02513_validate_data_types.sql +++ b/tests/queries/0_stateless/02513_validate_data_types.sql @@ -1,9 +1,9 @@ -- Tags: no-fasttest set allow_experimental_object_type=0; -select CAST('{"x" : 1}', 'JSON'); -- {serverError ILLEGAL_COLUMN} +select CAST('{"x" : 1}', 'Object(''json'')'); -- {serverError ILLEGAL_COLUMN} desc file(nonexist.json, JSONAsObject); -- {serverError ILLEGAL_COLUMN} -desc file(nonexist.json, JSONEachRow, 'x JSON'); -- {serverError ILLEGAL_COLUMN} +desc file(nonexist.json, JSONEachRow, 'x Object(''json'')'); -- {serverError ILLEGAL_COLUMN} set allow_suspicious_low_cardinality_types=0; select CAST(1000000, 'LowCardinality(UInt64)'); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index a0f228e6af4..c1aa24943c1 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -14,8 +14,8 @@ ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings databa ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql index 80e3c0a9ece..b169cfd0ab9 100644 --- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index 72f4ea06184..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,4 +0,0 @@ - MergeTreeReadPoolBase: Will use min_marks_per_task=24 - MergeTreeMarksLoader: Loading marks from path data.cmrk3 - MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key - MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 506ac2331f2..a91e49ddd22 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1; insert into data values (1); @@ -18,6 +18,10 @@ $CLICKHOUSE_CLIENT -nm -q " # instead of "last" value, hence you cannot simply append another # --send_logs_level here. CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=test/g') -$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -o -e '.*' -e '.*' -$CLICKHOUSE_CLIENT -q "drop table data" +set -e + +trap '$CLICKHOUSE_CLIENT -q "drop table data"' EXIT + +$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& (! grep -q -o -e '.*') +$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -q -o -e '.*' diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 3f478218ff1..a05446a494e 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -236,6 +236,18 @@ Check asan bug 0 Check bug found fuzzing 9042C6691B1A75F0EA3314B6F55728BB -Check bug 2 found fuzzing +Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B +Test empty arrays and maps +4761183170873013810 +0AD04BFD000000000000000000000000 +4761183170873013810 +0AD04BFD000000000000000000000000 +Test maps with arrays as keys +16734549324845627102 +1D03941D808D04810D2363A6C107D622 +16734549324845627102 +16734549324845627102 +1D03941D808D04810D2363A6C107D622 +1D03941D808D04810D2363A6C107D622 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index fb707109c83..7cfc82512bd 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } select hex(sipHash64()); SELECT hex(sipHash128()); @@ -339,9 +339,20 @@ SELECT 'Check bug found fuzzing'; SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); -SELECT 'Check bug 2 found fuzzing'; +SELECT 'Test arrays and maps'; DROP TABLE IF EXISTS sipHashKeyed_keys; CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; + +SELECT 'Test empty arrays and maps'; +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT 'Test maps with arrays as keys'; +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3))); +SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2); +SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2); diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql index a4e0965e329..3accc726d08 100644 --- a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql @@ -12,7 +12,7 @@ create table bug_delta_gorilla (value_bug UInt64 codec (Delta, Gorilla)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' -as (select 0 from numbers(30000000)); +as (select 0 from numbers(20000000)); select count(*) from bug_delta_gorilla diff --git a/tests/queries/0_stateless/02539_settings_alias.sh b/tests/queries/0_stateless/02539_settings_alias.sh index 5cfa09ad0fa..c770633c0ac 100755 --- a/tests/queries/0_stateless/02539_settings_alias.sh +++ b/tests/queries/0_stateless/02539_settings_alias.sh @@ -10,7 +10,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync'; echo "Checking setting value with '$check_query'" echo 'Using SET' - $CLICKHOUSE_CLIENT -mn -q """ + $CLICKHOUSE_CLIENT -m -q """ SET replication_alter_partitions_sync = 0; $check_query @@ -28,7 +28,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync'; done -$CLICKHOUSE_CLIENT -mn -q """ +$CLICKHOUSE_CLIENT -m -q """ DROP VIEW IF EXISTS 02539_settings_alias_view; CREATE VIEW 02539_settings_alias_view AS SELECT 1 SETTINGS replication_alter_partitions_sync = 2; SHOW CREATE TABLE 02539_settings_alias_view; diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index f7324ed0ee4..46f292d667d 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -200,8 +200,8 @@ select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; diff --git a/tests/queries/0_stateless/02553_new_type_json_attach_partition.reference b/tests/queries/0_stateless/02553_new_type_json_attach_partition.reference new file mode 100644 index 00000000000..1556b015503 --- /dev/null +++ b/tests/queries/0_stateless/02553_new_type_json_attach_partition.reference @@ -0,0 +1,2 @@ +{"b":"1","c":{"k1":"1"}} +{"b":"1","c":{"k1":["1","2"]}} diff --git a/tests/queries/0_stateless/02553_new_type_json_attach_partition.sql b/tests/queries/0_stateless/02553_new_type_json_attach_partition.sql new file mode 100644 index 00000000000..c7d4c0b5d55 --- /dev/null +++ b/tests/queries/0_stateless/02553_new_type_json_attach_partition.sql @@ -0,0 +1,15 @@ +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS t_json_attach_partition; + +CREATE TABLE t_json_attach_partition(b UInt64, c JSON) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_json_attach_partition FORMAT JSONEachRow {"b": 1, "c" : {"k1": 1}}; + +ALTER TABLE t_json_attach_partition DETACH PARTITION tuple(); +INSERT INTO t_json_attach_partition FORMAT JSONEachRow {"b": 1, "c" : {"k1": [1, 2]}}; + +ALTER TABLE t_json_attach_partition ATTACH PARTITION tuple(); +SELECT * FROM t_json_attach_partition ORDER BY toString(c) FORMAT JSONEachRow; + +DROP TABLE t_json_attach_partition; diff --git a/tests/queries/0_stateless/02553_type_json_attach_partition.sql b/tests/queries/0_stateless/02553_type_json_attach_partition.sql index e77f5885ec3..428189f3a84 100644 --- a/tests/queries/0_stateless/02553_type_json_attach_partition.sql +++ b/tests/queries/0_stateless/02553_type_json_attach_partition.sql @@ -2,7 +2,7 @@ SET allow_experimental_object_type = 1; DROP TABLE IF EXISTS t_json_attach_partition; -CREATE TABLE t_json_attach_partition(b UInt64, c JSON) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE t_json_attach_partition(b UInt64, c Object('json')) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_json_attach_partition FORMAT JSONEachRow {"b": 1, "c" : {"k1": 1}}; diff --git a/tests/queries/0_stateless/02553_type_object_analyzer.sql b/tests/queries/0_stateless/02553_type_object_analyzer.sql index eb4e49757cf..e5dd6eaebc0 100644 --- a/tests/queries/0_stateless/02553_type_object_analyzer.sql +++ b/tests/queries/0_stateless/02553_type_object_analyzer.sql @@ -3,7 +3,7 @@ SET allow_experimental_object_type = 1; SET enable_analyzer = 1; DROP TABLE IF EXISTS t_json_analyzer; -CREATE TABLE t_json_analyzer (a JSON) ENGINE = Memory; +CREATE TABLE t_json_analyzer (a Object('json')) ENGINE = Memory; INSERT INTO t_json_analyzer VALUES ('{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}'); SELECT any(a) AS data FROM t_json_analyzer FORMAT JSONEachRow; diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.sh b/tests/queries/0_stateless/02555_davengers_rename_chain.sh index 660a95846c4..196507dc72e 100755 --- a/tests/queries/0_stateless/02555_davengers_rename_chain.sh +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.sh @@ -46,7 +46,7 @@ tables["wrong_metadata_compact"]="min_bytes_for_wide_part = 10000000" for table in "${!tables[@]}"; do settings="${tables[$table]}" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS $table; CREATE TABLE $table( @@ -69,7 +69,7 @@ for table in "${!tables[@]}"; do wait_column "$table" "\`a1\` UInt64" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -82,7 +82,7 @@ for table in "${!tables[@]}"; do wait_mutation_loaded "$table" "b1 TO a" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -94,7 +94,7 @@ for table in "${!tables[@]}"; do wait_for_all_mutations "$table" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh index a3e428e75c8..22b94e09b58 100755 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -8,18 +8,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; +${CLICKHOUSE_CLIENT} --ignore-error --query "drop table if exists buffer_02572; drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" ${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" ${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();" -${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);" +${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 8, 8, 1, 1e9, 1, 1e9);" ${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;" +start=$(date +%s) ${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" -# ensure that the flush was not direct -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" +if [ $(( $(date +%s) - start )) -gt 6 ]; then # clickhouse test cluster is overloaded, will skip + # ensure that the flush was not direct + ${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" +fi # we cannot use OPTIMIZE, this will attach query context, so let's wait for _ in {1..100}; do @@ -28,11 +31,11 @@ for _ in {1..100}; do done -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" +${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" ${CLICKHOUSE_CLIENT} --query="system flush logs;" ${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) from system.query_views_log where view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3;" \ No newline at end of file + group by 2, 3;" diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh index e3e34109cdb..932cf593393 100755 --- a/tests/queries/0_stateless/02585_query_status_deadlock.sh +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" -$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q " -create temporary table tmp as select * from numbers(500000000); +$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q " +create temporary table tmp as select * from numbers(100000000); select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" @@ -19,8 +19,7 @@ do if [ -n "$res" ]; then break fi - sleep 1 + sleep 1 done $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null - diff --git a/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.python b/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.python new file mode 100644 index 00000000000..eb0cab9d56f --- /dev/null +++ b/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.python @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import os +import sys +from threading import Thread +from queue import Queue + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from pure_http_client import ClickHouseClient + + +client = ClickHouseClient() + + +client.query("DROP TABLE IF EXISTS test SYNC") +client.query( + """ +CREATE TABLE test +( + c_id String, + p_id String, + d UInt32, +) +Engine = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') +ORDER BY (c_id, p_id) +""" +) + + +def attempt_mutation(q): + try: + client.query( + "ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync=2", + with_retries=False, + ) + except ValueError as e: + assert "BAD_ARGUMENTS" in str(e) + q.put("OK") + + +client.query("INSERT INTO test SELECT '1', '11', '111' FROM numbers(5)") +client.query("SYSTEM ENABLE FAILPOINT infinite_sleep") +client.query( + "ALTER TABLE test UPDATE d = d + sleepEachRow(0.3) where 1 SETTINGS mutations_sync=0" +) +client.query("ALTER TABLE test ADD COLUMN x UInt32 default 0 SETTINGS mutations_sync=0") +client.query("ALTER TABLE test UPDATE d = x + 1 where 1 SETTINGS mutations_sync=0") + +q = Queue() +t = Thread(target=attempt_mutation, args=(q,)) +t.start() +t.join() +assert not q.empty() +assert q.get() == "OK" + +client.query("SYSTEM DISABLE FAILPOINT infinite_sleep") + +client.query("ALTER TABLE test UPDATE x = x + 1 where 1 SETTINGS mutations_sync=2") +client.query("ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync=2") +client.query("SELECT * from test format Null") +client.query("DROP TABLE test") diff --git a/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.reference b/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.sh b/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.sh new file mode 100755 index 00000000000..5be04d99204 --- /dev/null +++ b/tests/queries/0_stateless/02597_column_update_tricky_expression_and_replication.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02597_column_update_tricky_expression_and_replication.python diff --git a/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql b/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql deleted file mode 100644 index 34f88b19b7e..00000000000 --- a/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql +++ /dev/null @@ -1,28 +0,0 @@ -CREATE TABLE test ( - `c_id` String, - `p_id` String, - `d` UInt32 -) -ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') -ORDER BY (c_id, p_id); - -INSERT INTO test SELECT '1', '11', '111' FROM numbers(3); - -INSERT INTO test SELECT '2', '22', '22' FROM numbers(3); - -set mutations_sync=0; - -ALTER TABLE test UPDATE d = d + sleepEachRow(0.3) where 1; - -ALTER TABLE test ADD COLUMN x UInt32 default 0; -ALTER TABLE test UPDATE d = x + 1 where 1; -ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; --{serverError BAD_ARGUMENTS} - -ALTER TABLE test UPDATE x = x + 1 where 1 SETTINGS mutations_sync = 2; - -ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; - -select * from test format Null; - -DROP TABLE test; - diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference index babcecf7004..448eca3e5b1 100644 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference @@ -1,11 +1,11 @@ INSERT TO S3 - [ 0 ] S3Clients: 1 - [ 0 ] S3CompleteMultipartUpload: 1 - [ 0 ] S3CreateMultipartUpload: 1 - [ 0 ] S3HeadObject: 2 - [ 0 ] S3ReadRequestsCount: 2 - [ 0 ] S3UploadPart: 1 - [ 0 ] S3WriteRequestsCount: 3 +S3Clients 1 +S3CompleteMultipartUpload 1 +S3CreateMultipartUpload 1 +S3HeadObject 2 +S3ReadRequestsCount 2 +S3UploadPart 1 +Successful write requests 3 CHECK WITH query_log QueryFinish S3CreateMultipartUpload 1 S3UploadPart 1 S3CompleteMultipartUpload 1 S3PutObject 0 CREATE diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index e346d9893a7..ff534a6a2e6 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-merge-tree-settings # Tag no-fasttest: needs s3 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -9,7 +9,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "INSERT TO S3" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort +" 2>&1 | $CLICKHOUSE_LOCAL -q " +WITH '(\\w+): (\\d+)' AS pattern, + (SELECT (groupArray(regexpExtract(line, pattern, 1)), + groupArray(regexpExtract(line, pattern, 2)::UInt64))::Map(String, UInt64) + FROM file(stdin, 'LineAsString', 'line String') + WHERE line LIKE '% S3%' + AND line NOT LIKE '%Microseconds%' + AND line NOT LIKE '%S3DiskConnections%' + AND line NOT LIKE '%S3DiskAddresses%' + AND line NOT LIKE '%RequestThrottlerCount%' + ) AS pe_map +SELECT * FROM ( + SELECT untuple(arrayJoin(pe_map) AS pe) + WHERE tupleElement(pe, 1) not like '%WriteRequests%' + UNION ALL + SELECT 'Successful write requests', + (pe_map['S3WriteRequestsCount'] - pe_map['S3WriteRequestsErrors'])::UInt64 +) ORDER BY 1 +" echo "CHECK WITH query_log" $CLICKHOUSE_CLIENT -nq " @@ -40,19 +58,19 @@ CREATE TABLE times (t DateTime) ENGINE MergeTree ORDER BY t echo "INSERT" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO times SELECT now() + INTERVAL 1 day SETTINGS optimize_on_insert = 0; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' +" 2>&1 | grep -o -e ' \[ .* \] FileOpen: .* ' echo "READ" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " SELECT '1', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' +" 2>&1 | grep -o -e ' \[ .* \] FileOpen: .* ' echo "INSERT and READ INSERT" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO times SELECT now() + INTERVAL 2 day SETTINGS optimize_on_insert = 0; SELECT '2', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1; INSERT INTO times SELECT now() + INTERVAL 3 day SETTINGS optimize_on_insert = 0; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' +" 2>&1 | grep -o -e ' \[ .* \] FileOpen: .* ' echo "DROP" $CLICKHOUSE_CLIENT -nq " diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh index 2be13588453..2de267a79d7 100755 --- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh +++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel" -$CLICKHOUSE_CLIENT -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & +$CLICKHOUSE_CLIENT --max_rows_to_read 0 --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & pid=$! for _ in {0..60} diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index a34a480a078..cfb38c60615 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -10,7 +10,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # NOTE: .sh test is used over .sql because it needs $CLICKHOUSE_DATABASE to # avoid truncation, since seems that the version of MinIO that is used on CI # too slow with this. -$CLICKHOUSE_CLIENT -nm -q " +# +# Unfortunately, the test has to buffer it in memory. +$CLICKHOUSE_CLIENT --max_memory_usage 16G -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) SETTINGS s3_max_single_part_upload_size = '5Gi'; diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index 03e0f363d71..79253648475 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -26,7 +26,7 @@ read_methods=( for read_method in "${read_methods[@]}"; do query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "select * from data format Null settings max_local_read_bandwidth='1M', local_filesystem_read_method='$read_method'" - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT '$read_method', diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 4f6a300c5b3..c5776134673 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -nm -q " query_id=$(random_str 10) # writes 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds $CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data select * from numbers(1e6) settings max_local_write_bandwidth='1M'" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh index f9670e5f6f8..bb75ab5041b 100755 --- a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh +++ b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh @@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " SELECT 'Policy for table \`*\` does not affect other tables in the database'; CREATE ROW POLICY 02703_asterisk_${CLICKHOUSE_DATABASE}_policy ON ${CLICKHOUSE_DATABASE}.\`*\` USING x=1 AS permissive TO ALL; CREATE TABLE ${CLICKHOUSE_DATABASE}.\`*\` (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x AS SELECT 100, 20; diff --git a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh index 35151eed220..756f71fc043 100755 --- a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh +++ b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh @@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " DROP TABLE IF EXISTS 02703_rptable; DROP TABLE IF EXISTS 02703_rptable_another; diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.sh b/tests/queries/0_stateless/02703_row_policy_for_database.sh index e94bc7acd5e..c29fa313825 100755 --- a/tests/queries/0_stateless/02703_row_policy_for_database.sh +++ b/tests/queries/0_stateless/02703_row_policy_for_database.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_USER="user_$CLICKHOUSE_DATABASE" -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " DROP USER IF EXISTS ${CLICKHOUSE_USER}; CREATE USER ${CLICKHOUSE_USER}; @@ -28,7 +28,7 @@ DROP POLICY ${CLICKHOUSE_DATABASE}_tb_policy ON ${CLICKHOUSE_DATABASE}.table; $CLICKHOUSE_CLIENT --query "CREATE ROW POLICY any_02703 ON *.some_table USING 1 AS PERMISSIVE TO ALL;" 2>&1 | grep -q "SYNTAX_ERROR" -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " CREATE TABLE 02703_rqtable_default (x UInt8) ENGINE = MergeTree ORDER BY x; CREATE ROW POLICY ${CLICKHOUSE_DATABASE}_filter_11_db_policy ON * USING x=1 AS permissive TO ALL; diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 8cb03a93a7a..7e914c4c539 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -15,7 +15,7 @@ $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('backups', '$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02717_pretty_json.sql b/tests/queries/0_stateless/02717_pretty_json.sql index 8a49eb50adf..1a5c090bcb2 100644 --- a/tests/queries/0_stateless/02717_pretty_json.sql +++ b/tests/queries/0_stateless/02717_pretty_json.sql @@ -1,3 +1,3 @@ set allow_experimental_object_type=1; -select 42 as num, [42, 42] as arr, [[[42, 42], [42, 42]], [[42, 42]]] as nested_arr, tuple(42, 42)::Tuple(a UInt32, b UInt32) as tuple, tuple(tuple(tuple(42, 42), 42), 42)::Tuple(a Tuple(b Tuple(c UInt32, d UInt32), e UInt32), f UInt32) as nested_tuple, map(42, 42, 24, 24) as map, map(42, map(42, map(42, 42))) as nested_map, [tuple(map(42, 42), [42, 42]), tuple(map(42, 42), [42, 42])]::Array(Tuple(Map(UInt32, UInt32), Array(UInt32))) as nested_types, '{"a" : {"b" : 1, "c" : 2}}'::JSON as json_object format PrettyNDJSON; +select 42 as num, [42, 42] as arr, [[[42, 42], [42, 42]], [[42, 42]]] as nested_arr, tuple(42, 42)::Tuple(a UInt32, b UInt32) as tuple, tuple(tuple(tuple(42, 42), 42), 42)::Tuple(a Tuple(b Tuple(c UInt32, d UInt32), e UInt32), f UInt32) as nested_tuple, map(42, 42, 24, 24) as map, map(42, map(42, map(42, 42))) as nested_map, [tuple(map(42, 42), [42, 42]), tuple(map(42, 42), [42, 42])]::Array(Tuple(Map(UInt32, UInt32), Array(UInt32))) as nested_types, '{"a" : {"b" : 1, "c" : 2}}'::Object('json') as json_object format PrettyNDJSON; diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh index f349e29253a..7843e692822 100755 --- a/tests/queries/0_stateless/02724_delay_mutations.sh +++ b/tests/queries/0_stateless/02724_delay_mutations.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_delay_mutations SYNC; CREATE TABLE t_delay_mutations (id UInt64, v UInt64) @@ -36,14 +36,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations" wait_for_mutation "t_delay_mutations" "mutation_5.txt" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_delay_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done; DROP TABLE IF EXISTS t_delay_mutations SYNC; " -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SYSTEM FLUSH LOGS; SELECT diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh index 60888db0e2e..604cc9ff08e 100755 --- a/tests/queries/0_stateless/02724_limit_num_mutations.sh +++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh @@ -23,7 +23,7 @@ function wait_for_alter() done } -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_limit_mutations SYNC; CREATE TABLE t_limit_mutations (id UInt64, v UInt64) @@ -48,14 +48,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl SHOW CREATE TABLE t_limit_mutations; " -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " ALTER TABLE t_limit_mutations UPDATE v = 6 WHERE 1 SETTINGS number_of_mutations_to_throw = 100; ALTER TABLE t_limit_mutations MODIFY COLUMN v String SETTINGS number_of_mutations_to_throw = 100, alter_sync = 0; " wait_for_alter "String" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; @@ -65,7 +65,7 @@ ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_limit_mutations" wait_for_mutation "t_limit_mutations" "0000000003" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.sh b/tests/queries/0_stateless/02725_async_insert_table_setting.sh index 13911e8d677..14c2d335275 100755 --- a/tests/queries/0_stateless/02725_async_insert_table_setting.sh +++ b/tests/queries/0_stateless/02725_async_insert_table_setting.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_mt_async_insert; DROP TABLE IF EXISTS t_mt_sync_insert; @@ -19,7 +19,7 @@ url="${CLICKHOUSE_URL}&async_insert=0&wait_for_async_insert=1" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_async_insert VALUES (1, 'aa'), (2, 'bb')" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_sync_insert VALUES (1, 'aa'), (2, 'bb')" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT count() FROM t_mt_async_insert; SELECT count() FROM t_mt_sync_insert; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 97d644fa4d6..5d941adcb81 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -30,7 +30,9 @@ SELECT count() FROM t_async_inserts_flush; SYSTEM FLUSH ASYNC INSERT QUEUE; -SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush'; + SELECT count() FROM t_async_inserts_flush; DROP TABLE t_async_inserts_flush; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 876766d0780..61bbbd620f0 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -91,5 +91,5 @@ flush1 $TIMEOUT & wait ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts WHERE database = currentDatabase() AND table = 'async_inserts'" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh index b7d93b5396c..fd64e8d8cb8 100755 --- a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh +++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS mv; DROP TABLE IF EXISTS output; DROP TABLE IF EXISTS input; @@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -n -q " for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "INSERT INTO input SELECT * FROM numbers(1)" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 view, @@ -35,7 +35,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "SELECT * FROM system.one WHERE dummy IN (SELECT * FROM system.one) FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 subquery, @@ -52,7 +52,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CSE, @@ -69,7 +69,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x, x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CSE_Multi, @@ -86,7 +86,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CTE, @@ -103,7 +103,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x UNION ALL SELECT * FROM x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CTE_Multi, diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference index de0f151db7d..cb905d63ca5 100644 --- a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference +++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference @@ -44,7 +44,7 @@ nested.col1 Array(String) NO \N nested.col2 Array(UInt32) NO \N nfs Nullable(FixedString(3)) YES \N ns Nullable(String) YES \N -o Object(\'json\') NO \N +o JSON NO \N p Point NO \N pg Polygon NO \N r Ring NO \N diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql index 3bbcbb1a535..dadfa59bf87 100644 --- a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql +++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql @@ -11,7 +11,7 @@ DROP TABLE IF EXISTS tab; SET allow_suspicious_low_cardinality_types=1; -SET allow_experimental_object_type=1; +SET allow_experimental_json_type=1; CREATE TABLE tab ( diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect index 4798a6958c6..2079da9d34a 100755 --- a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect +++ b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect @@ -33,7 +33,7 @@ send -- "DROP TABLE IF EXISTS tab;\r" expect "Query OK, 0 rows affected" send -- "SET allow_suspicious_low_cardinality_types=1;\r" -send -- "SET allow_experimental_object_type=1;\r" +send -- "SET allow_experimental_json_type=1;\r" send -- " CREATE TABLE tab diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql index f678c913b46..2e4623f4ac6 100644 --- a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql +++ b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0; SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError TIMEOUT_EXCEEDED } -- Can return partial result SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break'; diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference index 7764974255b..877bb5f390f 100644 --- a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference @@ -1 +1,2 @@ 424242424242424242424242424242424242424242424242424242 +22707864971053448441042714569797161695738549521977760418632926980540162388532 diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh index 8865b2e7aab..0f590027f19 100755 --- a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh @@ -5,5 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +# This is parsed as text. $CLICKHOUSE_LOCAL -q "select toString(424242424242424242424242424242424242424242424242424242::UInt256) as x format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --structure='x UInt256' -q "select * from table" +# But this is parsed as binary because text length happens to be 32 bytes. Not ideal. +$CLICKHOUSE_LOCAL -q "select toString(42424242424242424242424242424242::UInt256) as x format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --structure='x UInt256' -q "select * from table" diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index dcde7dcc600..dad7f7cd028 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError SUPPORT_IS_DISABLED } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql index e9eb18bf615..95f224577f7 100644 --- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql +++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1; SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2); diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 333bc1bc25d..63fa60bd548 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -13,7 +13,7 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk(name = 's3_disk', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); -""" 2>&1 | grep -q "Disk with name \`s3_disk\` already exist" && echo 'OK' || echo 'FAIL' +""" 2>&1 | grep -q "Disk \`s3_disk\` already exists and is described by the config" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh index 9234c428147..e2afc1d208c 100755 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" -${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=16384 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" @@ -14,4 +14,4 @@ ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" # show wrong allocations -${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 16384 or abs(size) < 4096)" diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh index c32003a2a11..01e4f9a5c2b 100755 --- a/tests/queries/0_stateless/02835_drop_user_during_session.sh +++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug +# Tags: no-debug, no-random-settings, no-random-merge-tree-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02841_local_assert.sh b/tests/queries/0_stateless/02841_local_assert.sh index a167c09da1f..dc49007b0f6 100755 --- a/tests/queries/0_stateless/02841_local_assert.sh +++ b/tests/queries/0_stateless/02841_local_assert.sh @@ -7,12 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -nm +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -m echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -nm --ignore-error +insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -m --ignore-error echo "create table test (x UInt64) engine=Memory; insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS} -select 1" | $CLICKHOUSE_LOCAL -nm +select 1" | $CLICKHOUSE_LOCAL -m diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh index 556e2f52de2..d5a2d034014 100755 --- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number fro $CLICKHOUSE_CLIENT -q "SELECT xor(1, 0) FROM system.parts WHERE 1 IN (SELECT 1) FORMAT Null" # (Not all of these tests are effective because some of these tables are empty.) -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " select * from system.columns where table in (select '123'); select * from system.replicas where database in (select '123'); select * from system.data_skipping_indices where database in (select '123'); @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -nq " select * from system.replication_queue where database in (select '123'); select * from system.distribution_queue where database in (select '123'); " -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " create table a (x Int8) engine MergeTree order by x; insert into a values (1); select * from mergeTreeIndex(currentDatabase(), 'a') where part_name in (select '123'); diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference index 4adf418bcc7..8003b9cb626 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference @@ -71,3 +71,5 @@ d256 Nullable(Decimal(76, 40)) 500 244750 500 244750 500 244750 +42 +100 diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql index 950485d53f0..52caee50b32 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -131,3 +131,9 @@ select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, select count(), sum(number) from file('02841.parquet') where indexHint(string_or_null == ''); -- quirk with infinities select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_parquet_case_insensitive_column_matching = 1; + +-- Bad type conversions. +insert into function file('02841.parquet') select 42 as x; +select * from file('02841.parquet', Parquet, 'x Nullable(String)') where x not in (1); +insert into function file('t.parquet', Parquet, 'x String') values ('1'), ('100'), ('2'); +select * from file('t.parquet', Parquet, 'x Int64') where x >= 3; diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference new file mode 100644 index 00000000000..6ed63af507a --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference @@ -0,0 +1 @@ +[1,2] diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh new file mode 100755 index 00000000000..58eb207b6e6 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select f from file('$CURDIR/data_parquet/68131.parquet', Parquet, 'f Array(Int32)')" \ No newline at end of file diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql index 511ed0c59de..00b527a9378 100644 --- a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql +++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql @@ -4,7 +4,7 @@ CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filte INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000); SET timeout_overflow_mode='break'; -SET max_execution_time=0.1; +SET max_execution_time=0.1, max_rows_to_read=0; SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000)); DROP TABLE t; diff --git a/tests/queries/0_stateless/02864_statistics_bugs.reference b/tests/queries/0_stateless/02864_statistics_bugs.reference new file mode 100644 index 00000000000..a7eeae9def6 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_bugs.reference @@ -0,0 +1,3 @@ +10 +11 +0 diff --git a/tests/queries/0_stateless/02864_statistics_bugs.sql b/tests/queries/0_stateless/02864_statistics_bugs.sql new file mode 100644 index 00000000000..01bbe221b0f --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_bugs.sql @@ -0,0 +1,27 @@ +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; + +DROP TABLE IF EXISTS bug_67742; +CREATE TABLE bug_67742 (a Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); +INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000; +SELECT count(*) FROM bug_67742 WHERE a < '10'; +DROP TABLE bug_67742; + +DROP TABLE IF EXISTS bug_67742; +CREATE TABLE bug_67742 (a Int32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); +INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000; +SELECT count(*) FROM bug_67742 WHERE a < '10.5'; -- { serverError TYPE_MISMATCH } +DROP TABLE bug_67742; + +DROP TABLE IF EXISTS bug_67742; +CREATE TABLE bug_67742 (a Int32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); +INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000; +SELECT count(*) FROM bug_67742 WHERE a < 10.5; +DROP TABLE bug_67742; + +DROP TABLE IF EXISTS bug_67742; +CREATE TABLE bug_67742 (a Int16 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); +INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000; +SELECT count(*) FROM bug_67742 WHERE a < '9999999999999999999999999'; +DROP TABLE bug_67742; diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference deleted file mode 100644 index 02c41656a36..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql deleted file mode 100644 index c730aa7b4a7..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql +++ /dev/null @@ -1,70 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS tab SYNC; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET allow_suspicious_low_cardinality_types=1; -SET mutations_sync = 2; - -CREATE TABLE tab -( - a String, - b UInt64, - c Int64, - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE tab; - -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'Test statistics count_min:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min; -ALTER TABLE tab ADD STATISTICS c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - - -SELECT 'Test statistics multi-types:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - -DROP TABLE IF EXISTS tab SYNC; - - -SELECT 'Test LowCardinality and Nullable data type:'; -DROP TABLE IF EXISTS tab2 SYNC; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE tab2 -( - a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min), - pk String, -) Engine = MergeTree() ORDER BY pk; - -select name from system.tables where name = 'tab2' and database = currentDatabase(); - -DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference index a7ff5caa0b0..0e453b0ee8a 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.reference +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -1,31 +1,6 @@ -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index fe612efe2ac..32b56a842b7 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,59 +1,195 @@ --- Tests that various DDL statements create/drop/materialize statistics +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests that DDL statements which create / drop / materialize statistics + +SET mutations_sync = 1; DROP TABLE IF EXISTS tab; +-- Error case: Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; + +-- Error case: Unknown statistics types are rejected +CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- Error case: The same statistics type can't exist more than once on a column +CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +SET allow_suspicious_low_cardinality_types = 1; + +-- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*) + +-- tdigest requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- uniq requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- CREATE TABLE was easy, ALTER is more fun CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f32 Float32, + s String, + a Array(Float64) +) +Engine = MergeTree() +ORDER BY tuple(); +-- Error case: Unknown statistics types are rejected +-- (relevant for ADD and MODIFY) +ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +-- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported + +-- Error case: The same statistics type can't exist more than once on a column +-- (relevant for ADD and MODIFY) +-- Create the same statistics object twice +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +-- Create an statistics which exists already +ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op +ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op + +-- Error case: Column does not exist +-- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS } + +-- Error case: Column exists but has no statistics +-- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS } + +-- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the +-- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that +-- works and one that doesn't work. +-- tdigest +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +-- uniq +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +-- count_min +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } + +-- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing +-- statistics objects (e.g. tdigest can be created on Float64 and UInt64) +ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +-- Finally, do a full-circle test of a good case. Print table definition after each step. +-- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested. +SHOW CREATE TABLE tab; +ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab CLEAR STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab MATERIALIZE STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab DROP STATISTICS f64, f32; SHOW CREATE TABLE tab; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; -SELECT count(*) FROM tab WHERE b < NULL and a < '10'; - -ALTER TABLE tab DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -OPTIMIZE TABLE tab FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -ALTER TABLE tab RENAME COLUMN b TO c; -SHOW CREATE TABLE tab; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference new file mode 100644 index 00000000000..eb5e685597c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference @@ -0,0 +1,12 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After truncate, insert, and materialize + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql new file mode 100644 index 00000000000..d469a4c2036 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql @@ -0,0 +1,36 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET enable_analyzer = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +TRUNCATE TABLE tab; +SET mutations_sync = 2; +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +SELECT 'After truncate, insert, and materialize'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql deleted file mode 100644 index 289ffee6600..00000000000 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ /dev/null @@ -1,55 +0,0 @@ --- Tests creating/dropping/materializing statistics produces the right exceptions. - -DROP TABLE IF EXISTS tab; - --- Can't create statistics when allow_experimental_statistics = 0 -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - -SET allow_experimental_statistics = 1; - --- The same type of statistics can't exist more than once on a column -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest, tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- Unknown statistics types are rejected -CREATE TABLE tab -( - a Float64 STATISTICS(no_statistics_type) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- tDigest statistics can only be created on numeric columns -CREATE TABLE tab -( - a String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - -CREATE TABLE tab -( - a Float64, - b String -) Engine = MergeTree() ORDER BY tuple(); - -ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; --- Statistics can be created only on integer columns -ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS a; -ALTER TABLE tab DROP STATISTICS IF EXISTS a; -ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; -ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } - -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference deleted file mode 100644 index 5e969cf41cb..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference +++ /dev/null @@ -1,10 +0,0 @@ -10 -10 -10 -statistics not used Condition less(b, 10_UInt8) moved to PREWHERE -statistics not used Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE -2 0 diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql deleted file mode 100644 index 6606cff263f..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql +++ /dev/null @@ -1,52 +0,0 @@ --- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). - -DROP TABLE IF EXISTS tab; - -SET enable_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -SET materialize_statistics_on_insert = 0; - -CREATE TABLE tab -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE tab FINAL; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE tab; -SET mutations_sync = 2; - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE tab; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM tab%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO tab SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference new file mode 100644 index 00000000000..ffbd7269e05 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -0,0 +1,98 @@ +u64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +u64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +f64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +0 +0 +0 +0 +f64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +80 +80 +80 +80 +dt and = +0 +0 +0 +0 +10 +10 +10 +10 +dt and < +10000 +10000 +10000 +10000 +70 +70 +70 +70 +b and = +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +0 +0 +0 +0 +s and = +10 +10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql new file mode 100644 index 00000000000..779116cf19a --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -0,0 +1,214 @@ +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + u64 UInt64, + u64_tdigest UInt64 STATISTICS(tdigest), + u64_count_min UInt64 STATISTICS(count_min), + u64_uniq UInt64 STATISTICS(uniq), + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f64_count_min Float64 STATISTICS(count_min), + f64_uniq Float64 STATISTICS(uniq), + dt DateTime, + dt_tdigest DateTime STATISTICS(tdigest), + dt_count_min DateTime STATISTICS(count_min), + dt_uniq DateTime STATISTICS(uniq), + b Bool, + b_tdigest Bool STATISTICS(tdigest), + b_count_min Bool STATISTICS(count_min), + b_uniq Bool STATISTICS(uniq), + s String, + -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest + s_count_min String STATISTICS(count_min) + -- s_uniq String STATISTICS(uniq), -- not supported by uniq +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab +-- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; +SELECT number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 2, + number % 2, + number % 2, + number % 2, + toString(number % 1000), + toString(number % 1000) +FROM system.numbers LIMIT 10000; + +-- u64 ---------------------------------------------------- + +SELECT 'u64 and ='; + +SELECT count(*) FROM tab WHERE u64 = 7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7; +SELECT count(*) FROM tab WHERE u64_count_min = 7; +SELECT count(*) FROM tab WHERE u64_uniq = 7; + +SELECT count(*) FROM tab WHERE u64 = 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE u64_count_min = 7.7; +SELECT count(*) FROM tab WHERE u64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE u64 = '7'; +SELECT count(*) FROM tab WHERE u64_tdigest = '7'; +SELECT count(*) FROM tab WHERE u64_count_min = '7'; +SELECT count(*) FROM tab WHERE u64_uniq = '7'; + +SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } + +SELECT 'u64 and <'; + +SELECT count(*) FROM tab WHERE u64 < 7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7; +SELECT count(*) FROM tab WHERE u64_count_min < 7; +SELECT count(*) FROM tab WHERE u64_uniq < 7; + +SELECT count(*) FROM tab WHERE u64 < 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE u64_count_min < 7.7; +SELECT count(*) FROM tab WHERE u64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE u64 < '7'; +SELECT count(*) FROM tab WHERE u64_tdigest < '7'; +SELECT count(*) FROM tab WHERE u64_count_min < '7'; +SELECT count(*) FROM tab WHERE u64_uniq < '7'; + +SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } + +-- f64 ---------------------------------------------------- + +SELECT 'f64 and ='; + +SELECT count(*) FROM tab WHERE f64 = 7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7; +SELECT count(*) FROM tab WHERE f64_count_min = 7; +SELECT count(*) FROM tab WHERE f64_uniq = 7; + +SELECT count(*) FROM tab WHERE f64 = 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE f64_count_min = 7.7; +SELECT count(*) FROM tab WHERE f64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE f64 = '7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7'; + +SELECT count(*) FROM tab WHERE f64 = '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; + +SELECT 'f64 and <'; + +SELECT count(*) FROM tab WHERE f64 < 7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7; +SELECT count(*) FROM tab WHERE f64_count_min < 7; +SELECT count(*) FROM tab WHERE f64_uniq < 7; + +SELECT count(*) FROM tab WHERE f64 < 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE f64_count_min < 7.7; +SELECT count(*) FROM tab WHERE f64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE f64 < '7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7'; + +SELECT count(*) FROM tab WHERE f64 < '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; + +-- dt ---------------------------------------------------- + +SELECT 'dt and ='; + +SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt = 7; +SELECT count(*) FROM tab WHERE dt_tdigest = 7; +SELECT count(*) FROM tab WHERE dt_count_min = 7; +SELECT count(*) FROM tab WHERE dt_uniq = 7; + +SELECT 'dt and <'; + +SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt < 7; +SELECT count(*) FROM tab WHERE dt_tdigest < 7; +SELECT count(*) FROM tab WHERE dt_count_min < 7; +SELECT count(*) FROM tab WHERE dt_uniq < 7; + +-- b ---------------------------------------------------- + +SELECT 'b and ='; + +SELECT count(*) FROM tab WHERE b = true; +SELECT count(*) FROM tab WHERE b_tdigest = true; +SELECT count(*) FROM tab WHERE b_count_min = true; +SELECT count(*) FROM tab WHERE b_uniq = true; + +SELECT count(*) FROM tab WHERE b = 'true'; +SELECT count(*) FROM tab WHERE b_tdigest = 'true'; +SELECT count(*) FROM tab WHERE b_count_min = 'true'; +SELECT count(*) FROM tab WHERE b_uniq = 'true'; + +SELECT count(*) FROM tab WHERE b = 1; +SELECT count(*) FROM tab WHERE b_tdigest = 1; +SELECT count(*) FROM tab WHERE b_count_min = 1; +SELECT count(*) FROM tab WHERE b_uniq = 1; + +SELECT count(*) FROM tab WHERE b = 1.1; +SELECT count(*) FROM tab WHERE b_tdigest = 1.1; +SELECT count(*) FROM tab WHERE b_count_min = 1.1; +SELECT count(*) FROM tab WHERE b_uniq = 1.1; + +-- s ---------------------------------------------------- + +SELECT 's and ='; + +SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported + +SELECT count(*) FROM tab WHERE s = '7'; +-- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = '7'; +-- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference deleted file mode 100644 index 77786dbdd8c..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.reference +++ /dev/null @@ -1,35 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After modify TDigest - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) -After drop - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql deleted file mode 100644 index 0f5f353c045..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ /dev/null @@ -1,73 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET mutations_sync = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Int64 STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; -INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT 'After modify TDigest'; -ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; -ALTER TABLE t1 MATERIALIZE STATISTICS c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - - -ALTER TABLE t1 DROP STATISTICS c; - -SELECT 'After drop'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE t2 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c LowCardinality(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t2; -DROP TABLE IF EXISTS t3; - -CREATE TABLE t3 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Nullable(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t3; - diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference new file mode 100644 index 00000000000..a9f669b88c1 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.reference @@ -0,0 +1,20 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After add and materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql new file mode 100644 index 00000000000..4956bd27e87 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.sql @@ -0,0 +1,42 @@ +-- Test that the optimizer picks up column statistics +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; +SET enable_analyzer = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab DROP STATISTICS a, b; +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After add and materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab RENAME COLUMN b TO c; +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used) + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh index 4bc29ce4233..be0ef4e2648 100755 --- a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh +++ b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ DROP TABLE IF EXISTS with_lonely; CREATE TABLE with_lonely @@ -23,7 +23,7 @@ ORDER BY (id); """ create_optimize_partition() { - ${CLICKHOUSE_CLIENT} -nm -q """ + ${CLICKHOUSE_CLIENT} -m -q """ INSERT INTO with_lonely SELECT number, '$1', number*10, 0 FROM numbers(10); INSERT INTO with_lonely SELECT number+500000, '$1', number*10, 1 FROM numbers(10); """ @@ -39,7 +39,7 @@ create_optimize_partition "2022-10-29" create_optimize_partition "2022-10-30" create_optimize_partition "2022-10-31" -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ SYSTEM STOP MERGES with_lonely; INSERT INTO with_lonely SELECT number, '2022-11-01', number*10, 0 FROM numbers(10); diff --git a/tests/queries/0_stateless/02870_per_column_settings.sql b/tests/queries/0_stateless/02870_per_column_settings.sql index d242ebe6c61..c3050222bc8 100644 --- a/tests/queries/0_stateless/02870_per_column_settings.sql +++ b/tests/queries/0_stateless/02870_per_column_settings.sql @@ -49,7 +49,7 @@ CREATE TABLE tab ( id UInt64, tup Tuple(UInt64, UInt64) SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840), - json JSON SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840), + json Object('json') SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840), ) ENGINE = MergeTree ORDER BY id diff --git a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh index cc4ce9b122e..418e439e44b 100755 --- a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh +++ b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # head by default print 10 rows, but it is not enough to query 11 rows, since # we need to overflow the default pipe size, hence just 1 million of rows (it # should be around 6 MiB in text representation, should be definitelly enough). -$CLICKHOUSE_CLIENT --ignore-error -nm --pager head -q " +$CLICKHOUSE_CLIENT --ignore-error -m --pager head -q " select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } " diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.sh b/tests/queries/0_stateless/02871_peak_threads_usage.sh index dfb3e665020..0f0473bbb47 100755 --- a/tests/queries/0_stateless/02871_peak_threads_usage.sh +++ b/tests/queries/0_stateless/02871_peak_threads_usage.sh @@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_6" --query='SELECT * FROM nu ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_7" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 1, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_8" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 4, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(10000, 200000) @@ -38,7 +38,7 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(300000, 4000000) ) SETTINGS max_threads = 1""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(10000, 2000) @@ -50,7 +50,7 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(300000, 4000000) ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(1, 1) @@ -62,20 +62,20 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(1, 4000000) ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -m --query=""" SELECT sum(number) FROM numbers_mt(100000) GROUP BY number % 2 WITH TOTALS ORDER BY number % 2 SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}" ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" for i in {1..14} do - ${CLICKHOUSE_CLIENT} -mn --query=""" + ${CLICKHOUSE_CLIENT} -m --query=""" SELECT '${i}', peak_threads_usage, (select count() from system.query_thread_log WHERE system.query_thread_log.query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()) = length(thread_ids), diff --git a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh index 3f2b732e71b..3a7d861262e 100755 --- a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh +++ b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "select 1; select 2;" $CLICKHOUSE_LOCAL -q "select 1; select 2;" # -n is a no-op -$CLICKHOUSE_CLIENT -n -q "select 1; select 2;" -$CLICKHOUSE_LOCAL -n -q "select 1; select 2;" +$CLICKHOUSE_CLIENT -q "select 1; select 2;" +$CLICKHOUSE_LOCAL -q "select 1; select 2;" exit 0 diff --git a/tests/queries/0_stateless/02875_merge_engine_set_index.sh b/tests/queries/0_stateless/02875_merge_engine_set_index.sh index 355d83167a6..f40696c31a9 100755 --- a/tests/queries/0_stateless/02875_merge_engine_set_index.sh +++ b/tests/queries/0_stateless/02875_merge_engine_set_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE t1 ( a UInt32, @@ -57,7 +57,7 @@ ORDER BY b DESC FORMAT Null;" -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectedMarks'] diff --git a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh index 8bdaa47c111..dd08724456b 100755 --- a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh +++ b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 42 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64) engine=Memory; insert into test (x) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); insert into test (y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); diff --git a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh index 315bbcd544f..c7270b65e19 100755 --- a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh +++ b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 1 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64 default 42) engine=Memory; insert into test select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); select * from test; diff --git a/tests/queries/0_stateless/02883_named_collections_override.sh b/tests/queries/0_stateless/02883_named_collections_override.sh index a08c795127d..915ce280226 100755 --- a/tests/queries/0_stateless/02883_named_collections_override.sh +++ b/tests/queries/0_stateless/02883_named_collections_override.sh @@ -8,7 +8,7 @@ u1="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection1" u2="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection2" u3="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection3" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP NAMED COLLECTION IF EXISTS $u1; DROP NAMED COLLECTION IF EXISTS $u2; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh index 7f583087336..791515c82d6 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_1; CREATE TABLE t_async_insert_native_1 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -22,7 +22,7 @@ echo '{"id": 1, "s": "aaa"}' \ | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow {"id": 2, "s": "bbb"}' 2>&1 \ | grep -o "NOT_IMPLEMENTED" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT sum(length(entries.bytes)) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_1'; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh index b9b1854eaef..a8a9209ee68 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_2; CREATE TABLE t_async_insert_native_2 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -18,7 +18,7 @@ echo "(3, 'ccc') (4, 'ddd') (5, 'eee')" | $CLICKHOUSE_CLIENT $async_insert_optio wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_2 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh index c9d399607d0..229f13eb821 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_3; CREATE TABLE t_async_insert_native_3 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -21,7 +21,7 @@ $CLICKHOUSE_CLIENT $async_insert_options -q "INSERT INTO t_async_insert_native_3 wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT format, length(entries.bytes) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_3' ORDER BY format; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh index 9118c11315c..e84c1ca8899 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_4; CREATE TABLE t_async_insert_native_4 (id UInt64) ENGINE = MergeTree ORDER BY id; " @@ -20,7 +20,7 @@ echo "(2) (3) (4) (5)" | $CLICKHOUSE_CLIENT_WITH_LOG $async_insert_options --asy -q 'INSERT INTO t_async_insert_native_4 FORMAT Values' 2>&1 \ | grep -c "too much data" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_4 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index a03343c8cb3..39e7aad87e0 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -27,6 +27,7 @@ OK OK 100 100 +OK ===== TestGrants ===== OK OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index cc4e76a9ed9..fadbbff7f34 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -199,6 +199,8 @@ ${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM gen ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1" ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2" +(( $(${CLICKHOUSE_CLIENT} --query "ALTER TABLE test_table MODIFY SQL SECURITY INVOKER" 2>&1 | grep -c "is not supported") >= 1 )) && echo "OK" || echo "UNEXPECTED" + echo "===== TestGrants =====" ${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1" ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_table TO $user1, $user2" diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference index bac15838dc2..1f5346a1484 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.reference +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -12,7 +12,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -32,7 +32,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -42,6 +42,7 @@ SETTINGS max_threads = 1; 0 2 0 1 2 0 2 2 0 +SET max_rows_to_read = 40000000; SELECT nw, sum(WR) AS R, @@ -53,7 +54,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -64,7 +65,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -75,7 +76,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -86,7 +87,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql index c5ab013a198..2207c90a4ee 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -1,6 +1,6 @@ -- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug -CREATE TABLE window_funtion_threading +CREATE TABLE window_function_threading Engine = MergeTree ORDER BY (ac, nw) AS SELECT @@ -20,7 +20,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -40,7 +40,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -58,7 +58,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -66,6 +66,8 @@ ORDER BY nw ASC, R DESC LIMIT 10 SETTINGS max_threads = 1; +SET max_rows_to_read = 40000000; + SELECT nw, sum(WR) AS R, @@ -77,7 +79,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -88,7 +90,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -99,7 +101,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -110,7 +112,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, diff --git a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh index 2917ec86957..065658d4d56 100755 --- a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh +++ b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', a $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Alias y, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Materialized 42, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64 Ephemeral, y UInt64 default x + 1) engine=Memory; insert into test (x, y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl'); select * from test; diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 9181cb27cb0..b77ef4a0fdf 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -54,6 +54,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 1 _shard_num UInt32 Deprecated. Use function shardNum instead 1 +_database LowCardinality(String) The name of database which the row comes from 1 +_table LowCardinality(String) The name of table which the row comes from 1 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 index column 0 0 @@ -87,6 +89,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 0 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 0 1 _shard_num UInt32 Deprecated. Use function shardNum instead 0 1 +_database LowCardinality(String) The name of database which the row comes from 0 1 +_table LowCardinality(String) The name of table which the row comes from 0 1 arr.size0 UInt64 1 0 t.a String ZSTD(1) 1 0 t.b UInt64 ZSTD(1) 1 0 @@ -144,6 +148,8 @@ _row_exists UInt8 1 _block_number UInt64 1 _block_offset UInt64 1 _shard_num UInt32 1 +_database LowCardinality(String) 1 +_table LowCardinality(String) 1 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 0 0 @@ -177,6 +183,8 @@ _row_exists UInt8 0 1 _block_number UInt64 0 1 _block_offset UInt64 0 1 _shard_num UInt32 0 1 +_database LowCardinality(String) 0 1 +_table LowCardinality(String) 0 1 arr.size0 UInt64 1 0 t.a String 1 0 t.b UInt64 1 0 diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index a364e447062..74000bc298f 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -9,7 +9,7 @@ mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -${CLICKHOUSE_CLIENT} -n -q --ignore-error " +${CLICKHOUSE_CLIENT} -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; CREATE DATABASE IF NOT EXISTS npy_output_02895; diff --git a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh index d6775927f35..b4656c9e321 100755 --- a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh +++ b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS data; DROP TABLE IF EXISTS data2; DROP VIEW IF EXISTS mv1; diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index ec86a66c7dd..ecaad62b35a 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0, max_execution_time = 0, max_estimated_execution_time = 0; + -- Query stops after timeout without an error SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; diff --git a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh index 64f5dd1a987..ef631d9ed1b 100755 --- a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh +++ b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh @@ -5,13 +5,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting disabled and no order by or primary key; expect error -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; " 2>&1 \ | grep -F -q "You must provide an ORDER BY or PRIMARY KEY expression in the table definition." && echo 'OK' || echo 'FAIL' # setting disabled and primary key in table definition -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() PRIMARY KEY a SETTINGS index_granularity = 8192; SHOW CREATE TABLE test_empty_order_by; diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh index 5065da371a8..b77e5b0b402 100755 --- a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh +++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh @@ -15,7 +15,7 @@ if [[ $($CLICKHOUSE_CLIENT -q "select count()>0 from system.clusters where clust cluster=test_cluster_database_replicated fi -$CLICKHOUSE_CLIENT -nm --distributed_ddl_output_mode=none -q " +$CLICKHOUSE_CLIENT -m --distributed_ddl_output_mode=none -q " drop table if exists rmt1; drop table if exists rmt2; @@ -46,7 +46,7 @@ part_name='%' # wait while there be at least one 'No active replica has part all_0_1_1 or covering part' in logs for _ in {0..50}; do - no_active_repilica_messages=$($CLICKHOUSE_CLIENT -nm -q " + no_active_repilica_messages=$($CLICKHOUSE_CLIENT -m -q " system flush logs; select count() @@ -65,7 +65,7 @@ for _ in {0..50}; do sleep 1 done -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system start pulling replication log rmt2; system flush logs; diff --git a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh index 7ac9b488be5..5f9dc6ea077 100755 --- a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh +++ b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting enabled and no order by or primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY tuple()" && echo 'OK' || echo 'FAIL' # setting enabled and per-column primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8 PRIMARY KEY, b String PRIMARY KEY) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -21,7 +21,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY (a, b)" && echo 'OK' || echo 'FAIL' # setting enabled and primary key in table definition (not per-column or order by) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() PRIMARY KEY (a) SETTINGS index_granularity = 8192; @@ -29,7 +29,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY a" && echo 'OK' || echo 'FAIL' # setting enabled and order by in table definition (no primary key) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS index_granularity = 8192; diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh index 30ec50fa20f..e37f1e51c74 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,14 +15,14 @@ create materialized view mv (a Int32) engine = MergeTree() order by tuple() as s uuid=$(${CLICKHOUSE_CLIENT} --query "select uuid from system.tables where table='mv' and database == currentDatabase()") inner_table=".inner_id.${uuid}" -${CLICKHOUSE_CLIENT} -nm --query "drop table \`$inner_table\` sync" +${CLICKHOUSE_CLIENT} -m --query "drop table \`$inner_table\` sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " set send_logs_level = 'error'; backup table ${CLICKHOUSE_DATABASE}.\`mv\` to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; restore table ${CLICKHOUSE_DATABASE}.\`mv\` from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh index d59ebe400ee..f950954941f 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,18 +15,18 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -37,13 +37,13 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; drop table dst; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh index 8ed36a7edd7..dc5793d1638 100755 --- a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh +++ b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set data_type_default_nullable = 0; create table test (test String) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set data_type_default_nullable = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference index aa8f22f590a..0db19f0591a 100644 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference @@ -1,8 +1,8 @@ BACKUP_CREATED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 BACKUP_CREATED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh index 742d24a97eb..eae307add10 100755 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set flatten_nested = 0; create table test (test Array(Tuple(foo String, bar Float64))) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test2; set flatten_nested = 0; create table test2 (test Nested(foo String, bar Float64)) ENGINE = MergeTree() ORDER BY tuple(); @@ -22,7 +22,7 @@ backup table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost to Dis ${CLICKHOUSE_CLIENT} --query "show create table test2" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); @@ -30,7 +30,7 @@ restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from D ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test2 sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh index 57182050534..2cad15c6fcb 100755 --- a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh +++ b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP DICTIONARY IF EXISTS 02907_dictionary; DROP TABLE IF EXISTS 02907_table; diff --git a/tests/queries/0_stateless/02907_system_backups_profile_events.sh b/tests/queries/0_stateless/02907_system_backups_profile_events.sh index 801056a2844..9a1d5a3db11 100755 --- a/tests/queries/0_stateless/02907_system_backups_profile_events.sh +++ b/tests/queries/0_stateless/02907_system_backups_profile_events.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple(); " @@ -12,10 +12,10 @@ create table test (a Int32) engine = MergeTree() order by tuple(); backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME} backup_name="Disk('backups', '$backup_id')"; -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " backup table ${CLICKHOUSE_DATABASE}.test to $backup_name; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select ProfileEvents['BackupEntriesCollectorMicroseconds'] > 10 from system.backups where name='Disk(\'backups\', \'$backup_id\')' " diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh index 4845f740972..218e13efb95 100755 --- a/tests/queries/0_stateless/02908_Npy_files_caching.sh +++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh @@ -7,13 +7,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy', auto, 'array Int64') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/one_dim.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/npy_big.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference index d4191af1594..41a60204eab 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference @@ -1,2 +1,2 @@ -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql index c7216833bc9..127baa8304e 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql @@ -3,8 +3,8 @@ CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi'; DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME'; CREATE TABLE test2 (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2'; diff --git a/tests/queries/0_stateless/02908_table_ttl_dependency.sh b/tests/queries/0_stateless/02908_table_ttl_dependency.sh index 70136b4a42b..0bc02426f61 100755 --- a/tests/queries/0_stateless/02908_table_ttl_dependency.sh +++ b/tests/queries/0_stateless/02908_table_ttl_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS 02908_dependent; DROP TABLE IF EXISTS 02908_main; @@ -14,11 +14,11 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE 02908_dependent (a UInt32, ts DateTime) ENGINE = MergeTree ORDER BY a TTL ts + 1 WHERE a IN (SELECT a FROM ${CLICKHOUSE_DATABASE}.02908_main); " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_main; " 2>&1 | grep -F -q "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_dependent; DROP TABLE 02908_main; " diff --git a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh index 8da144f90ca..75d491642ea 100755 --- a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh +++ b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=0; SELECT count() from system.schema_inference_cache where format = 'JSON' and additional_format_info like '%schema_inference_make_columns_nullable%';" diff --git a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql index bda5e958453..97672bf89c6 100644 --- a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql +++ b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql @@ -11,10 +11,10 @@ ORDER BY i; INSERT INTO test02910 (i, jString) SELECT 1, '{"a":"123"}'; -ALTER TABLE test02910 ADD COLUMN j2 Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 JSON default jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Object('json') default jString; -- { serverError SUPPORT_IS_DISABLED } -- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server. -- SELECT * FROM test02910; @@ -39,10 +39,10 @@ INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['c', 'd'] FROM numbers(10); INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', [] FROM numbers(10); -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` JSON; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Object('json'); -- { serverError SUPPORT_IS_DISABLED } -- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server. -- SELECT * FROM test02910; diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh index c04667505c3..01aba244a02 100755 --- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh +++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh database_name="$CLICKHOUSE_DATABASE"_02911_keeper_map -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP DATABASE IF EXISTS $database_name; CREATE DATABASE $database_name; CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key; @@ -13,9 +13,9 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key; " -$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;" +$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;" -$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;" +$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;" backup_path="$database_name" for i in $(seq 1 3); do diff --git a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh index 80e38338751..7ad38e11e96 100755 --- a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh +++ b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " CREATE TABLE IF NOT EXISTS ts_data_double_raw ( device_id UInt32 NOT NULL CODEC(ZSTD), diff --git a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh index 5f0f41a956b..b6d6ca57768 100755 --- a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh +++ b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh @@ -13,40 +13,40 @@ b_backup="Disk('backups', '$b_backup_id')" c_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_c c_backup="Disk('backups', '$c_backup_id')" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl1; DROP TABLE IF EXISTS tbl2; DROP TABLE IF EXISTS tbl3; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl1 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must write backup 'a'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $a_backup SETTINGS id='$a_backup_id'; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl2 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read backup 'a' and write backup 'b'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $b_backup SETTINGS id='$b_backup_id', base_backup=$a_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl3 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read only backup 'b' (and not 'a') and write backup 'c'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $c_backup SETTINGS id='$c_backup_id', base_backup=$b_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; @@ -57,28 +57,28 @@ r2_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r2 r3_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r3 # The following RESTORE command must read all 3 backups 'a', 'b', c' because the table 'tbl1' was in the first backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl1 FROM $c_backup SETTINGS id='$r1_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 2 backups 'b', c' (and not 'a') because the table 'tbl2' was in the second backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl2 FROM $c_backup SETTINGS id='$r2_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 1 backup 'c' (and not 'a' or 'b') because the table 'tbl3' was in the third backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl3 FROM $c_backup SETTINGS id='$r3_restore_id'; " | grep -o "RESTORED" all_ids="['$a_backup_id', '$b_backup_id', '$c_backup_id', '$r1_restore_id', '$r2_restore_id', '$r3_restore_id']" id_prefix_len=`expr "${CLICKHOUSE_TEST_UNIQUE_NAME}_" : '.*'` -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " SELECT substr(id, 1 + $id_prefix_len) as short_id, ProfileEvents['BackupsOpenedForRead'], ProfileEvents['BackupsOpenedForWrite'] FROM system.backups WHERE id IN ${all_ids} ORDER BY short_id " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql index f7c04ab6d1f..08b6c580a28 100644 --- a/tests/queries/0_stateless/02915_sleep_large_uint.sql +++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql @@ -1,6 +1,7 @@ SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS } SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS } SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS } +SET max_rows_to_read = 0; SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW } -- Another corner case, but it requires lots of memory to run (huge block size) diff --git a/tests/queries/0_stateless/02916_dictionary_access.sh b/tests/queries/0_stateless/02916_dictionary_access.sh index 08ee517ab3b..be62cc027ef 100755 --- a/tests/queries/0_stateless/02916_dictionary_access.sh +++ b/tests/queries/0_stateless/02916_dictionary_access.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -23,15 +23,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT dictGet(${dictname}, 'value', 1); " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM dictionary(${dictname}); " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT dictGet(${dictname}, 'value', 1); " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; " diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.sql b/tests/queries/0_stateless/02916_glogal_in_cancel.sql index ad54f1ecdec..dd61795947a 100644 --- a/tests/queries/0_stateless/02916_glogal_in_cancel.sql +++ b/tests/queries/0_stateless/02916_glogal_in_cancel.sql @@ -1,2 +1,2 @@ -set max_execution_time = 0.5, timeout_overflow_mode = 'break'; +set max_execution_time = 0.5, timeout_overflow_mode = 'break', max_rows_to_read = 0; SELECT number FROM remote('127.0.0.{3|2}', numbers(1)) WHERE number GLOBAL IN (SELECT number FROM numbers(10000000000.)) format Null; diff --git a/tests/queries/0_stateless/02916_joinget_dependency.sh b/tests/queries/0_stateless/02916_joinget_dependency.sh index 6477ae8c967..ff9332cb57f 100755 --- a/tests/queries/0_stateless/02916_joinget_dependency.sh +++ b/tests/queries/0_stateless/02916_joinget_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We test the dependency on the DROP -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS Sub_distributed; DROP TABLE IF EXISTS Sub; DROP TABLE IF EXISTS Mapping; @@ -20,8 +20,8 @@ $CLICKHOUSE_CLIENT -q " DROP TABLE Mapping; " 2>&1 | grep -cm1 "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE Sub_distributed; DROP TABLE Sub; DROP TABLE Mapping; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.sh b/tests/queries/0_stateless/02930_client_file_log_comment.sh index 50cd587e4b5..393bffcaf59 100755 --- a/tests/queries/0_stateless/02930_client_file_log_comment.sh +++ b/tests/queries/0_stateless/02930_client_file_log_comment.sh @@ -14,7 +14,7 @@ echo -n 'select 4242' >> "$file2" $CLICKHOUSE_CLIENT --queries-file "$file1" "$file2" <<<'select 42' $CLICKHOUSE_CLIENT --log_comment foo --queries-file /dev/stdin <<<'select 424242' -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 42' and type != 'QueryStart'; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 4242' and type != 'QueryStart'; diff --git a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh index d9e4a2c8f8b..76ab56a4570 100755 --- a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh +++ b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " create table test (x UInt64, y UInt32, size UInt64) engine=Memory; insert into test select c1, c2, _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1; select * from test; diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference index 802d920aaef..b41635f014e 100644 --- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference @@ -1635,21 +1635,21 @@ QUERY id: 0 JOIN TREE TABLE id: 10, alias: __table1, table_name: default.test_table SELECT sum(float64 + 2) From test_table; -26.5 +26.875 SELECT sum(2 + float64) From test_table; -26.5 +26.875 SELECT sum(float64 - 2) From test_table; -6.5 +6.875 SELECT sum(2 - float64) From test_table; --6.5 +-6.875 SELECT sum(float64) + 2 * count(float64) From test_table; -26.5 +26.875 SELECT 2 * count(float64) + sum(float64) From test_table; -26.5 +26.875 SELECT sum(float64) - 2 * count(float64) From test_table; -6.5 +6.875 SELECT 2 * count(float64) - sum(float64) From test_table; --6.5 +-6.875 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); QUERY id: 0 PROJECTION COLUMNS @@ -2463,25 +2463,25 @@ QUERY id: 0 JOIN TREE TABLE id: 12, alias: __table1, table_name: default.test_table SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; -58 +58.75 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; -5 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; -8 +8.75 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; 5 SELECT sum(2 - float64) - sum(3 - float64) From test_table; -5 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; -58 +58.75 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; -5 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; -8 +8.75 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; 5 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; --8 +-8.75 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); QUERY id: 0 PROJECTION COLUMNS diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql index 5492d061c12..b6fa097abe9 100644 --- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql @@ -25,11 +25,12 @@ CREATE TABLE test_table decimal32 Decimal32(5), ) ENGINE=MergeTree ORDER BY uint64; -INSERT INTO test_table VALUES (1, 1.1, 1.11); -INSERT INTO test_table VALUES (2, 2.2, 2.22); -INSERT INTO test_table VALUES (3, 3.3, 3.33); -INSERT INTO test_table VALUES (4, 4.4, 4.44); -INSERT INTO test_table VALUES (5, 5.5, 5.55); +-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order. +INSERT INTO test_table VALUES (1, 1.125, 1.11); +INSERT INTO test_table VALUES (2, 2.250, 2.22); +INSERT INTO test_table VALUES (3, 3.375, 3.33); +INSERT INTO test_table VALUES (4, 4.500, 4.44); +INSERT INTO test_table VALUES (5, 5.625, 5.55); -- { echoOn } SELECT sum(uint64 + 1 AS i) from test_table where i > 0; diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views_1.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views_1.reference new file mode 100644 index 00000000000..bfc6add90a7 --- /dev/null +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views_1.reference @@ -0,0 +1,32 @@ +<1: created view> a [] 1 +CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 2 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x +<2: refreshed> 3 1 1 +<3: time difference at least> 1000 +<4: next refresh in> 2 +<4.1: fake clock> Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:03 +<4.5: altered> Scheduled Finished 2052-01-01 00:00:00 +CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT x * 2 AS x\nFROM default.src +<5: no refresh> 3 +<6: refreshed> 2 +<7: refreshed> Scheduled Finished 2054-01-01 00:00:00 +CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR DEPENDS ON default.a\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a +<7.5: created dependent> 2052-11-11 11:11:11 +<8: refreshed> 20 +<9: refreshed> a Scheduled Finished 2054-01-01 00:00:00 +<9: refreshed> b Scheduled Finished 2054-01-01 00:00:00 +<9.2: dropping> 0 2 +<9.4: dropped> 0 2 +<10: creating> a Scheduled [] 2054-01-01 00:00:00 +<10: creating> b WaitingForDependencies ['default.a'] 2054-01-01 00:00:00 +<11: chain-refreshed a> 4 +<12: chain-refreshed b> 40 +<13: chain-refreshed> a Scheduled [] Finished 2054-01-01 00:00:01 2056-01-01 00:00:00 1 +<13: chain-refreshed> b Scheduled ['default.a'] Finished 2054-01-24 23:22:21 2056-01-01 00:00:00 1 +<14: waiting for next cycle> a Scheduled [] 2058-01-01 00:00:00 +<14: waiting for next cycle> b WaitingForDependencies ['default.a'] 2060-01-01 00:00:00 +<15: chain-refreshed a> 6 +<16: chain-refreshed b> 60 +<17: chain-refreshed> a Scheduled 2062-01-01 00:00:00 +<17: chain-refreshed> b Scheduled 2062-01-01 00:00:00 +<18: removed dependency> b Scheduled [] 2062-03-03 03:03:03 2064-01-01 00:00:00 5 +CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views_1.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views_1.sh new file mode 100755 index 00000000000..2b92a113e91 --- /dev/null +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views_1.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash +# Tags: atomic-database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Set session timezone to UTC to make all DateTime formatting and parsing use UTC, because refresh +# scheduling is done in UTC. +CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT" | sed 's/--session_timezone[= ][^ ]*//g'`" +CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT --allow_experimental_refreshable_materialized_view=1 --session_timezone Etc/UTC"`" + +$CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refreshes where database = '$CLICKHOUSE_DATABASE' order by view" + + +# Basic refreshing. +$CLICKHOUSE_CLIENT -nq " + create materialized view a + refresh after 2 second + engine Memory + empty + as select number as x from numbers(2) union all select rand64() as x; + select '<1: created view>', view, remaining_dependencies, exception, last_refresh_result in ('Unknown', 'Finished') from refreshes; + show create a;" +# Wait for any refresh. (xargs trims the string and turns \t and \n into spaces) +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" == 'Unknown' ] +do + sleep 0.5 +done +start_time="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" +# Check table contents. +$CLICKHOUSE_CLIENT -nq "select '<2: refreshed>', count(), sum(x=0), sum(x=1) from a" +# Wait for table contents to change. +res1="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values'`" +while : +do + res2="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values -- $LINENO'`" + [ "$res2" == "$res1" ] || break + sleep 0.5 +done +# Wait for another change. +while : +do + res3="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values -- $LINENO'`" + [ "$res3" == "$res2" ] || break + sleep 0.5 +done +# Check that the two changes were at least 1 second apart, in particular that we're not refreshing +# like crazy. This is potentially flaky, but we need at least one test that uses non-mocked timer +# to make sure the clock+timer code works at all. If it turns out flaky, increase refresh period above. +$CLICKHOUSE_CLIENT -nq " + select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $start_time, 1000); + select '<4: next refresh in>', next_refresh_time-last_refresh_time from refreshes;" + +# Create a source table from which views will read. +$CLICKHOUSE_CLIENT -nq " + create table src (x Int8) engine Memory as select 1;" + +# Switch to fake clock, change refresh schedule, change query. +$CLICKHOUSE_CLIENT -nq " + system test view a set fake time '2050-01-01 00:00:01'; + system wait view a; + system refresh view a; + system wait view a; + select '<4.1: fake clock>', status, last_refresh_time, next_refresh_time from refreshes; + alter table a modify refresh every 2 year; + alter table a modify query select x*2 as x from src; + select '<4.5: altered>', status, last_refresh_result, next_refresh_time from refreshes; + show create a;" +# Advance time to trigger the refresh. +$CLICKHOUSE_CLIENT -nq " + select '<5: no refresh>', count() from a; + system test view a set fake time '2052-02-03 04:05:06';" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_time from refreshes -- $LINENO" | xargs`" != '2052-02-03 04:05:06' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<6: refreshed>', * from a; + select '<7: refreshed>', status, last_refresh_result, next_refresh_time from refreshes;" + +# Create a dependent view, refresh it once. +$CLICKHOUSE_CLIENT -nq " + create materialized view b refresh every 2 year depends on a (y Int32) engine MergeTree order by y empty as select x*10 as y from a; + show create b; + system test view b set fake time '2052-11-11 11:11:11'; + system refresh view b; + system wait view b; + select '<7.5: created dependent>', last_refresh_time from refreshes where view = 'b';" +# Next refresh shouldn't start until the dependency refreshes. +$CLICKHOUSE_CLIENT -nq " + select '<8: refreshed>', * from b; + select '<9: refreshed>', view, status, last_refresh_result, next_refresh_time from refreshes; + system test view b set fake time '2054-01-24 23:22:21';" +while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != 'WaitingForDependencies 2054-01-01 00:00:00' ] +do + sleep 0.5 +done + +# Drop the source table, check that refresh fails and doesn't leave a temp table behind. +$CLICKHOUSE_CLIENT -nq " + select '<9.2: dropping>', countIf(name like '%tmp%'), countIf(name like '%.inner%') from system.tables where database = currentDatabase(); + drop table src; + system refresh view a;" +$CLICKHOUSE_CLIENT -nq "system wait view a;" 2>/dev/null && echo "SYSTEM WAIT VIEW failed to fail at $LINENO" +$CLICKHOUSE_CLIENT -nq " + select '<9.4: dropped>', countIf(name like '%tmp%'), countIf(name like '%.inner%') from system.tables where database = currentDatabase();" + +# Create the source table again, check that refresh succeeds (in particular that tables are looked +# up by name rather than uuid). +$CLICKHOUSE_CLIENT -nq " + select '<10: creating>', view, status, remaining_dependencies, next_refresh_time from refreshes; + create table src (x Int16) engine Memory as select 2; + system test view a set fake time '2054-01-01 00:00:01';" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'b' -- $LINENO" | xargs`" != 'Scheduled' ] +do + sleep 0.5 +done +# Both tables should've refreshed. +$CLICKHOUSE_CLIENT -nq " + select '<11: chain-refreshed a>', * from a; + select '<12: chain-refreshed b>', * from b; + select '<13: chain-refreshed>', view, status, remaining_dependencies, last_refresh_result, last_refresh_time, next_refresh_time, exception == '' from refreshes;" + +# Make the dependent table run ahead by one refresh cycle, make sure it waits for the dependency to +# catch up to the same cycle. +$CLICKHOUSE_CLIENT -nq " + system test view b set fake time '2059-01-01 00:00:00'; + system refresh view b;" +while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != '2060-01-01 00:00:00' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + system test view b set fake time '2061-01-01 00:00:00'; + system test view a set fake time '2057-01-01 00:00:00';" +while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2058-01-01 00:00:00 WaitingForDependencies 2060-01-01 00:00:00' ] +do + sleep 0.5 +done + +$CLICKHOUSE_CLIENT -nq " + select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes; + truncate src; + insert into src values (3); + system test view a set fake time '2060-02-02 02:02:02';" +while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != '2062-01-01 00:00:00' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<15: chain-refreshed a>', * from a; + select '<16: chain-refreshed b>', * from b; + select '<17: chain-refreshed>', view, status, next_refresh_time from refreshes;" + +# Get to WaitingForDependencies state and remove the depencency. +$CLICKHOUSE_CLIENT -nq " + system test view b set fake time '2062-03-03 03:03:03'" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'b' -- $LINENO" | xargs`" != 'WaitingForDependencies' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + alter table b modify refresh every 2 year" +while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != 'Scheduled 2062-03-03 03:03:03' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<18: removed dependency>', view, status, remaining_dependencies, last_refresh_time,next_refresh_time, refresh_count from refreshes where view = 'b'; + show create b;" + +$CLICKHOUSE_CLIENT -nq " + drop table src; + drop table a; + drop table b; + drop table refreshes;" diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views_2.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views_2.reference new file mode 100644 index 00000000000..cdaad32de0a --- /dev/null +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views_2.reference @@ -0,0 +1,30 @@ +<19: exception> 1 +<20: unexception> 1 +<21: rename> 1 +<22: rename> d Finished +<23: simple refresh> 1 +<24: rename during refresh> 1 +<25: rename during refresh> f Running +<27: cancelled> f Scheduled Cancelled +<28: drop during refresh> 0 0 +CREATE MATERIALIZED VIEW default.g\nREFRESH EVERY 1 WEEK OFFSET 3 DAY 4 HOUR RANDOMIZE FOR 4 DAY 1 HOUR\n(\n `x` Int64\n)\nENGINE = Memory\nAS SELECT 42 AS x +<29: randomize> 1 1 +CREATE MATERIALIZED VIEW default.h\nREFRESH EVERY 1 SECOND TO default.dest\n(\n `x` Int64\n)\nAS SELECT x * 10 AS x\nFROM default.src +<30: to existing table> 10 +<31: to existing table> 10 +<31: to existing table> 20 +<31.5: will retry> Error 1 +<31.6: did retry> 10 +<32: empty> i Scheduled Unknown 0 +<32: empty> j Scheduled Finished 0 +<34: append> 10 +<35: append> 10 +<35: append> 20 +<35: append> 30 +<36: not append> 20 +<36: not append> 30 +<37: append chain> 100 +<38: append chain> 100 +<38: append chain> 100 +<38: append chain> 200 +creating MergeTree without ORDER BY failed, as expected diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views_2.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views_2.sh new file mode 100755 index 00000000000..50a905576d5 --- /dev/null +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views_2.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash +# Tags: atomic-database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Set session timezone to UTC to make all DateTime formatting and parsing use UTC, because refresh +# scheduling is done in UTC. +CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT" | sed 's/--session_timezone[= ][^ ]*//g'`" +CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT --allow_experimental_refreshable_materialized_view=1 --allow_materialized_view_with_bad_select=0 --session_timezone Etc/UTC"`" + +$CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refreshes where database = '$CLICKHOUSE_DATABASE' order by view" + + +# Select from a table that doesn't exist, get an exception. +$CLICKHOUSE_CLIENT -nq " + create table src (x Int8) engine Memory as select 1; + create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src; + drop table src;" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Error' ] +do + sleep 0.5 +done +# Check exception, create src, expect successful refresh. +$CLICKHOUSE_CLIENT -nq " + select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c'; + create table src (x Int64) engine Memory as select 1; + system refresh view c;" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] +do + sleep 0.5 +done +# Rename table. +$CLICKHOUSE_CLIENT -nq " + select '<20: unexception>', * from c; + rename table c to d; + select '<21: rename>', * from d; + select '<22: rename>', view, last_refresh_result from refreshes;" + +# Do various things during a refresh. +# First make a nonempty view. +$CLICKHOUSE_CLIENT -nq " + drop table d; + truncate src; + insert into src values (1); + create materialized view e refresh every 1 second (x Int64) engine MergeTree order by x empty as select x + sleepEachRow(1) as x from src settings max_block_size = 1;" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] +do + sleep 0.5 +done +# Stop refreshes. +$CLICKHOUSE_CLIENT -nq " + select '<23: simple refresh>', * from e; + system stop view e;" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Disabled' ] +do + sleep 0.5 +done +# Make refreshes slow, wait for a slow refresh to start. (We stopped refreshes first to make sure +# we wait for a slow refresh, not a previous fast one.) +$CLICKHOUSE_CLIENT -nq " + insert into src select * from numbers(1000) settings max_block_size=1; + system start view e;" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ] +do + sleep 0.5 +done +# Rename. +$CLICKHOUSE_CLIENT -nq " + rename table e to f; + select '<24: rename during refresh>', * from f; + select '<25: rename during refresh>', view, status from refreshes where view = 'f'; + alter table f modify refresh after 10 year;" + +# Cancel. +$CLICKHOUSE_CLIENT -nq " + system cancel view f;" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Scheduled' ] +do + sleep 0.5 +done +# Check that another refresh doesn't immediately start after the cancelled one. +$CLICKHOUSE_CLIENT -nq " + select '<27: cancelled>', view, status, last_refresh_result from refreshes where view = 'f'; + system refresh view f;" +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ] +do + sleep 0.5 +done +# Drop. +$CLICKHOUSE_CLIENT -nq " + drop table f; + select '<28: drop during refresh>', view, status from refreshes; + select '<28: drop during refresh>', countIf(name like '%tmp%'), countIf(name like '%.inner%') from system.tables where database = currentDatabase()" + +# Try OFFSET and RANDOMIZE FOR. +$CLICKHOUSE_CLIENT -nq " + create materialized view g refresh every 1 week offset 3 day 4 hour randomize for 4 day 1 hour (x Int64) engine Memory empty as select 42 as x; + show create g; + system test view g set fake time '2050-02-03 15:30:13';" +while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time > '2049-01-01' from refreshes -- $LINENO" | xargs`" != '1' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + with '2050-02-10 04:00:00'::DateTime as expected + select '<29: randomize>', abs(next_refresh_time::Int64 - expected::Int64) <= 3600*(24*4+1), next_refresh_time != expected from refreshes;" + +# Send data 'TO' an existing table. +$CLICKHOUSE_CLIENT -nq " + drop table g; + create table dest (x Int64) engine MergeTree order by x; + truncate src; + insert into src values (1); + create materialized view h refresh every 1 second to dest empty as select x*10 as x from src; + show create h;" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<30: to existing table>', * from dest; + insert into src values (2);" +while [ "`$CLICKHOUSE_CLIENT -nq "select count() from dest -- $LINENO" | xargs`" != '2' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<31: to existing table>', * from dest; + drop table dest; + drop table h;" + +# Retries. +$CLICKHOUSE_CLIENT -nq " + create materialized view h2 refresh after 1 year settings refresh_retries = 10 (x Int64) engine Memory as select x*10 + throwIf(x % 2 == 0) as x from src;" +$CLICKHOUSE_CLIENT -nq "system wait view h2;" 2>/dev/null && echo "SYSTEM WAIT VIEW failed to fail at $LINENO" +$CLICKHOUSE_CLIENT -nq " + select '<31.5: will retry>', last_refresh_result, retry > 0 from refreshes; + create table src2 (x Int8) engine Memory; + insert into src2 values (1); + exchange tables src and src2; + drop table src2;" +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result, retry from refreshes -- $LINENO" | xargs`" != 'Finished 0' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<31.6: did retry>', x from h2; + drop table h2" + +# EMPTY +$CLICKHOUSE_CLIENT -nq " + create materialized view i refresh after 1 year engine Memory empty as select number as x from numbers(2); + create materialized view j refresh after 1 year engine Memory as select number as x from numbers(2);" +while [ "`$CLICKHOUSE_CLIENT -nq "select sum(last_success_time is null) from refreshes -- $LINENO" | xargs`" == '2' ] +do + sleep 0.5 +done +$CLICKHOUSE_CLIENT -nq " + select '<32: empty>', view, status, last_refresh_result, retry from refreshes order by view; + drop table i; + drop table j;" + +# APPEND +$CLICKHOUSE_CLIENT -nq " + create materialized view k refresh every 10 year append (x Int64) engine Memory empty as select x*10 as x from src; + select '<33: append>', * from k; + system refresh view k; + system wait view k; + select '<34: append>', * from k; + truncate table src; + insert into src values (2), (3); + system refresh view k; + system wait view k; + select '<35: append>', * from k order by x;" +# ALTER to non-APPEND +$CLICKHOUSE_CLIENT -nq " + alter table k modify refresh every 10 year; + system wait view k; + system refresh view k; + system wait view k; + select '<36: not append>', * from k order by x; + drop table k; + truncate table src;" + +# APPEND + TO + regular materialized view reading from it. +$CLICKHOUSE_CLIENT -nq " + create table mid (x Int64) engine MergeTree order by x; + create materialized view l refresh every 10 year append to mid empty as select x*10 as x from src; + create materialized view m (x Int64) engine Memory as select x*10 as x from mid; + insert into src values (1); + system refresh view l; + system wait view l; + select '<37: append chain>', * from m; + insert into src values (2); + system refresh view l; + system wait view l; + select '<38: append chain>', * from m order by x; + drop table l; + drop table m; + drop table mid;" + +# Failing to create inner table. +$CLICKHOUSE_CLIENT -nq " + create materialized view n refresh every 1 second (x Int64) engine MergeTree as select 1 as x from numbers(2);" 2>/dev/null || echo "creating MergeTree without ORDER BY failed, as expected" +$CLICKHOUSE_CLIENT -nq " + create materialized view n refresh every 1 second (x Int64) engine MergeTree order by x as select 1 as x from numbers(2); + drop table n;" + +# Reading from table that doesn't exist yet. +$CLICKHOUSE_CLIENT -nq " + create materialized view o refresh every 1 second (x Int64) engine Memory as select x from nonexist; -- { serverError UNKNOWN_TABLE } + create materialized view o (x Int64) engine Memory as select x from nonexist; -- { serverError UNKNOWN_TABLE } + create materialized view o (x Int64) engine Memory as select x from nope.nonexist; -- { serverError UNKNOWN_DATABASE } + create materialized view o refresh every 1 second (x Int64) engine Memory as select x from nope.nonexist settings allow_materialized_view_with_bad_select = 1; + drop table o;" + +$CLICKHOUSE_CLIENT -nq " + drop table refreshes;" diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference index 17a25d82824..0f64d0393b2 100644 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference @@ -1,7 +1,7 @@ -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index b587549cb60..2b78746ae2c 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -12,15 +12,15 @@ ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1 AS id" # Replicated storafes are forbidden ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1 AS id" |& grep -cm1 "SUPPORT_IS_DISABLED" # POPULATE is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6' +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1 AS id" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1 AS id" --compatibility='24.6' # AS SELECT is forbidden even with the setting ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED" diff --git a/tests/queries/0_stateless/02935_format_with_arbitrary_types.reference b/tests/queries/0_stateless/02935_format_with_arbitrary_types.reference index 3455adc8723..f100e8c48d4 100644 --- a/tests/queries/0_stateless/02935_format_with_arbitrary_types.reference +++ b/tests/queries/0_stateless/02935_format_with_arbitrary_types.reference @@ -34,6 +34,7 @@ The answer to all questions is 2023-11-14 05:50:12.123. The answer to all questions is hallo. The answer to all questions is [\'foo\',\'bar\']. The answer to all questions is {"foo":"bar"}. +The answer to all questions is {"foo":"bar"}. The answer to all questions is (42,\'foo\'). The answer to all questions is {42:\'foo\'}. The answer to all questions is 122.233.64.201. diff --git a/tests/queries/0_stateless/02935_format_with_arbitrary_types.sql b/tests/queries/0_stateless/02935_format_with_arbitrary_types.sql index ad1de2bec6d..dcc3964e4b0 100644 --- a/tests/queries/0_stateless/02935_format_with_arbitrary_types.sql +++ b/tests/queries/0_stateless/02935_format_with_arbitrary_types.sql @@ -3,6 +3,7 @@ -- no-fasttest: json type needs rapidjson library, geo types need s2 geometry SET allow_experimental_object_type = 1; +SET allow_experimental_json_type = 1; SET allow_suspicious_low_cardinality_types=1; SELECT '-- Const string + non-const arbitrary type'; @@ -40,6 +41,7 @@ SELECT format('The {0} to all questions is {1}.', 'answer', materialize('2023-11 SELECT format('The {0} to all questions is {1}.', 'answer', materialize('2023-11-14 05:50:12.123' :: DateTime64(3, 'Europe/Amsterdam'))); SELECT format('The {0} to all questions is {1}.', 'answer', materialize('hallo' :: Enum('hallo' = 1))); SELECT format('The {0} to all questions is {1}.', 'answer', materialize(['foo', 'bar'] :: Array(String))); +SELECT format('The {0} to all questions is {1}.', 'answer', materialize('{"foo": "bar"}' :: Object('json'))); SELECT format('The {0} to all questions is {1}.', 'answer', materialize('{"foo": "bar"}' :: JSON)); SELECT format('The {0} to all questions is {1}.', 'answer', materialize((42, 'foo') :: Tuple(Int32, String))); SELECT format('The {0} to all questions is {1}.', 'answer', materialize(map(42, 'foo') :: Map(Int32, String))); diff --git a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh index 0e23bb6c42b..9d1faf301d3 100755 --- a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh +++ b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh @@ -9,12 +9,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # no message at all echo "thread = 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' # send messages to some threads echo "thread != 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' # there is no thread with comm="foo", so no signals will be sent echo "thread_name = 'foo'" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh index 27950866e81..44af2dbf26f 100755 --- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh +++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh @@ -26,7 +26,7 @@ function wait_part() function restore_failpoints() { # restore entry error with failpoints (to avoid endless errors in logs) - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_unfail_entries; system sync replica $failed_replica; system disable failpoint replicated_queue_unfail_entries; @@ -34,7 +34,7 @@ function restore_failpoints() } trap restore_failpoints EXIT -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " drop table if exists data_r1; drop table if exists data_r2; @@ -45,7 +45,7 @@ $CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " " # will fail ALTER_METADATA on one of replicas -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_fail_next_entry; alter table data_r1 drop index value_idx settings alter_sync=0; -- part all_0_0_0_1 @@ -80,7 +80,7 @@ fi # This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge) $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1 -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " insert into $success_replica (key) values (2); -- part all_2_2_0 -- Avoid 'Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet' system sync replica $success_replica pull; diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference index 298cc908178..c6bbcdc20c2 100644 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -1,20 +1,20 @@ -100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 0 10 98 set max_size from 100 to 10 -10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +10 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 1 8 set max_size from 10 to 100 -100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 set max_elements from 10 to 2 -100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 2 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 2 18 set max_elements from 2 to 10 -100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh index 6cb184cb1fe..ec699d974d4 100755 --- a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) user_name="${CLICKHOUSE_DATABASE}_test_user_02947" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; @@ -44,7 +44,7 @@ $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.size $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; " diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index daa9c571a5d..d06aba8a4b6 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE source_table ( id UInt64, diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh index 7f690a681c4..737d5c6b41e 100755 --- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh +++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # CREATE TABLE local (x UInt8) Engine=Memory; # CREATE TABLE distributed ON CLUSTER cluster (p Date, i Int32) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), x) -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS local; DROP TABLE IF EXISTS distributed; CREATE TABLE local (x UInt8) Engine=Memory; diff --git a/tests/queries/0_stateless/02952_conjunction_optimization.reference b/tests/queries/0_stateless/02952_conjunction_optimization.reference index eeadfaae21d..8af0abefd3a 100644 --- a/tests/queries/0_stateless/02952_conjunction_optimization.reference +++ b/tests/queries/0_stateless/02952_conjunction_optimization.reference @@ -32,7 +32,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: Bool ARGUMENTS LIST id: 6, nodes: 2 - CONSTANT id: 7, constant_value: UInt64_1, constant_value_type: Bool + CONSTANT id: 7, constant_value: Bool_1, constant_value_type: Bool FUNCTION id: 8, function_name: notIn, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 9, nodes: 2 diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql index 7ebef866360..6b285d423e7 100644 --- a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -2,13 +2,33 @@ drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() -settings disk=disk(name='test1', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test1/'); +settings disk=disk(name='02963_custom_disk', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test1/'); -drop table test; +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='02963_custom_disk', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test2/'); -- { serverError BAD_ARGUMENTS } + +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='02963_custom_disk'); -- { serverError BAD_ARGUMENTS } + +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk='02963_custom_disk'; -- { serverError BAD_ARGUMENTS } + +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='s3_disk_02963'); -- { serverError BAD_ARGUMENTS } + +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk='s3_disk_02963'; -drop table test; +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='s3_disk_02963', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test2/'); -- { serverError BAD_ARGUMENTS } + +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test1', type = object_storage, @@ -17,7 +37,7 @@ settings disk=disk(name='test1', access_key_id = clickhouse, secret_access_key = clickhouse); -drop table test; +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test2', type = object_storage, @@ -27,7 +47,7 @@ settings disk=disk(name='test2', access_key_id = clickhouse, secret_access_key = clickhouse); -drop table test; +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test3', type = object_storage, @@ -37,8 +57,8 @@ settings disk=disk(name='test3', endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, secret_access_key = clickhouse); -drop table test; +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test4', type = object_storage, @@ -48,8 +68,8 @@ settings disk=disk(name='test4', endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, secret_access_key = clickhouse); -drop table test; +drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test5', type = object_storage, diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh index e954cb0e78e..c4b44ce11c5 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh @@ -30,7 +30,7 @@ SETTINGS enable_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r @@ -55,7 +55,7 @@ SETTINGS enable_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r @@ -81,7 +81,7 @@ SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r @@ -106,7 +106,7 @@ SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r @@ -131,7 +131,7 @@ SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh index d9bae05270a..0879bf02d60 100755 --- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -15,7 +15,7 @@ do echo $i >> ${logs_dir}/a.txt done -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS file_log; DROP TABLE IF EXISTS table_to_store_data; DROP TABLE IF EXISTS file_log_mv; @@ -69,7 +69,7 @@ done ${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE file_log; DROP TABLE table_to_store_data; DROP TABLE file_log_mv; diff --git a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference index 5555c918500..3e63763d544 100644 --- a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference +++ b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference @@ -7,7 +7,7 @@ Decimal 45 Decimal(10, 0) Decimal(M) 46 Decimal(4, 0) Decimal(M, D) 47.21 Decimal(4, 2) Double 48.11 Float64 -JSON {"foo":"bar"} Object(\'json\') +JSON {"foo":"bar"} JSON Real 49.22 Float32 Signed 50 Int64 Unsigned 52 UInt64 @@ -21,7 +21,7 @@ Decimal 45 Decimal(10, 0) Decimal(M) 46 Decimal(4, 0) Decimal(M, D) 47.21 Decimal(4, 2) Double 48.11 Float64 -JSON {"foo":"bar"} Object(\'json\') +JSON {"foo":"bar"} JSON Real 49.22 Float32 Signed 50 Int64 Unsigned 52 UInt64 diff --git a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql index 7b5735cdebc..8cccde4b0ab 100644 --- a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql +++ b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql @@ -1,7 +1,7 @@ -- See https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast -- Tests are in order of the type appearance in the docs -SET allow_experimental_object_type = 1; +SET allow_experimental_json_type = 1; SELECT '-- Uppercase tests'; -- Not supported as it is translated to FixedString without arguments diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.sh b/tests/queries/0_stateless/02974_backup_query_format_null.sh index ddba2f6de16..345a4f47b20 100755 --- a/tests/queries/0_stateless/02974_backup_query_format_null.sh +++ b/tests/queries/0_stateless/02974_backup_query_format_null.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl; CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO tbl VALUES (2), (80), (-12345); @@ -14,7 +14,7 @@ backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" ${CLICKHOUSE_CLIENT} --query "BACKUP TABLE tbl TO ${backup_name} FORMAT Null" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl; RESTORE ALL FROM ${backup_name} FORMAT Null " diff --git a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh index e65c9654c9c..7ad5a2179f9 100755 --- a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh +++ b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq """ +${CLICKHOUSE_CLIENT} -q """ CREATE TABLE t1_local ( n UInt64, diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh index 813cc49cbd8..e7e7d3c3b42 100755 --- a/tests/queries/0_stateless/02995_index_10.sh +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -37,8 +37,9 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String -HAVING count() > 0; -" +HAVING count() > 0 +SETTINGS trace_profile_events=0 -- test is too slow with profiling +;" done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} "DROP TABLE test" +${CLICKHOUSE_CLIENT} -q "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh index a5fdd98b2f8..7a03b0d4c1a 100755 --- a/tests/queries/0_stateless/02995_index_7.sh +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql index 8ccc3cf61da..6714a069246 100644 --- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -1,3 +1,5 @@ +-- Tags: long +SET max_rows_to_read = 0; create table test (number UInt64) engine=MergeTree order by number; insert into test select * from numbers(50000000); select ignore(number) from test where RAND() > 4292390314 limit 10; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh index dbaf7bf20c4..b3aba430003 100755 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh @@ -12,12 +12,12 @@ set -e function wait_until() { local q=$1 && shift - while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do + while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do sleep 0.5 done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists rmt_master; drop table if exists rmt_slave; @@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q " optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select 'before'; select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; @@ -42,7 +42,7 @@ $CLICKHOUSE_CLIENT -nm -q " " # wait until rmt_slave will fetch the part and reflect this error in system.part_log wait_until "select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system sync replica rmt_slave; system flush logs; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh index a6a02946602..c67db053401 100755 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh @@ -15,12 +15,12 @@ set -e function wait_until() { local q=$1 && shift - while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do + while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do sleep 0.5 done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists rmt_master; drop table if exists rmt_slave; @@ -41,17 +41,17 @@ $CLICKHOUSE_CLIENT -nm -q " # the part, and rmt_slave will consider it instead of performing mutation on # it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored wait_for_mutation rmt_master 0000000000 -$CLICKHOUSE_CLIENT -nm -q "system start pulling replication log rmt_slave" +$CLICKHOUSE_CLIENT -m -q "system start pulling replication log rmt_slave" # and wait until rmt_slave to fetch the part and reflect this error in system.part_log wait_until "select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " select 'before'; select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; system start replicated sends rmt_master; " wait_for_mutation rmt_slave 0000000000 -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system sync replica rmt_slave; system flush logs; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index 46f24f73356..a70e70ef7e9 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -382,6 +382,253 @@ key1 e 5 5 5 key1 C 3 4 5 key2 a2 1 1 1 0 0 \N key4 f 2 3 4 key4 F 1 1 1 SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 d 4 7 2 key1 0 0 \N +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 d 4 7 2 key1 0 0 \N +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SET join_algorithm='hash'; +SELECT t1.* FROM t1 LEFT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key1 e 5 5 5 +key2 a2 1 1 1 +key4 f 2 3 4 +SELECT t1.* FROM t1 LEFT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key2 a2 1 1 1 +key4 f 2 3 4 +SELECT t1.* FROM t1 LEFT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 e 5 5 5 +SELECT t1.* FROM t1 RIGHT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +SELECT t1.* FROM t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +SELECT t1.* FROM t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 key1 a 1 1 2 key1 B 2 1 2 @@ -452,3 +699,46 @@ key2 a2 1 1 1 key1 A 1 2 1 key2 a2 1 1 1 key3 a3 1 1 1 key2 a2 1 1 1 key4 F 1 1 1 key4 f 2 3 4 key1 B 2 1 2 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='hash'; +SELECT t1.* FROM t1 INNER ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key2 a2 1 1 1 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 index 61ad5ec0bf1..b300881c562 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 @@ -22,6 +22,26 @@ SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 {% endfor -%} {% endfor -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'RIGHT'] -%} +{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +{% endfor -%} +{% endfor -%} +{% endfor -%} + +{% for algorithm in ['hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'RIGHT'] -%} +{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%} +SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +{% endfor -%} +{% endfor -%} +{% endfor -%} {% for algorithm in ['hash'] -%} SET join_algorithm='{{ algorithm }}'; @@ -29,6 +49,28 @@ SET join_algorithm='{{ algorithm }}'; SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); {% endfor -%} {% endfor -%} + +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['INNER'] -%} +{% for join_strictness in ['ANY'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +{% endfor -%} +{% endfor -%} +{% endfor -%} + +{% for algorithm in ['hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['INNER'] -%} +{% for join_strictness in ['ANY'] -%} +SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +{% endfor -%} +{% endfor -%} +{% endfor -%} + -- { echoOff } -- test error messages diff --git a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh index e9f59138177..07b99eb4e99 100755 --- a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh +++ b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh @@ -35,7 +35,7 @@ THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ --insert-method $insert_method \ --table-engine $engine \ @@ -48,7 +48,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'insert_several_blocks_into_table OK' || echo "FAIL: insert_several_blocks_into_table ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ --insert-method $insert_method \ --table-engine $engine \ @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'mv_generates_several_blocks OK' || echo "FAIL: mv_generates_several_blocks ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ --insert-method $insert_method \ --table-engine $engine \ diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index d51e180efc9..e61f9061297 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "drop table if exists 03008_test_local_mt sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " create table 03008_test_local_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) settings disk = disk( @@ -19,35 +19,35 @@ settings disk = disk( path = '/var/lib/clickhouse/disks/local_plain_rewritable/') " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " insert into 03008_test_local_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); insert into 03008_test_local_mt (*) select number, number, number from numbers_mt(10000); " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " ${CLICKHOUSE_CLIENT} --query "optimize table 03008_test_local_mt final;" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt modify setting disk = '03008_local_plain_rewritable', old_parts_lifetime = 3600; select engine_full from system.tables WHERE database = currentDatabase() AND name = '03008_test_local_mt'; " | grep -c "old_parts_lifetime = 3600" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt update c = 0 where a % 2 = 1; alter table 03008_test_local_mt add column d Int64 after c; alter table 03008_test_local_mt drop column c; " 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " truncate table 03008_test_local_mt; select count(*) from 03008_test_local_mt; " diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sh b/tests/queries/0_stateless/03008_s3_plain_rewritable.sh index 4d5989f6f12..8eea7940774 100755 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sh @@ -46,7 +46,12 @@ ${CLICKHOUSE_CLIENT} --query "drop table if exists test_s3_mt_dst" ${CLICKHOUSE_CLIENT} -m --query " create table test_s3_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) -settings disk = '03008_s3_plain_rewritable' +settings disk = disk( + name = 03008_s3_plain_rewritable, + type = s3_plain_rewritable, + endpoint = 'http://localhost:11111/test/03008_test_s3_mt/', + access_key_id = clickhouse, + secret_access_key = clickhouse); " ${CLICKHOUSE_CLIENT} -m --query " diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql index 810503207f2..df8e4db1536 100644 --- a/tests/queries/0_stateless/03015_peder1001.sql +++ b/tests/queries/0_stateless/03015_peder1001.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + DROP TABLE IF EXISTS test_data; CREATE TABLE test_data diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh index 6f59e9b9703..e2f9cf48108 100755 --- a/tests/queries/0_stateless/03031_clickhouse_local_input.sh +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -6,15 +6,15 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" echo '# foo' -$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << d2 -[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) -[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) -[1,2,3] 42 0 Array(Int64) Int64 -[1,2,3] abc 0 Array(Int64) String -[1,2,3] \N 0 Array(Int64) None -[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) -42 [1,2,3] 1 Int64 Array(Int64) -42 42 0 Int64 Int64 -42 43 0 Int64 Int64 -42 abc 0 Int64 String -42 \N 0 Int64 None -43 42 1 Int64 Int64 -abc [1,2,3] 1 String Array(Int64) -abc 42 1 String Int64 -abc abc 0 String String -abc abd 0 String String -abc \N 0 String None -abd abc 1 String String -\N [1,2,3] 1 None Array(Int64) -\N 42 1 None Int64 -\N abc 1 None String -\N \N 0 None None +[1,2,3] [1,2,3] 0 Array(Int64) true Array(Int64) true +[1,2,3] [1,2,4] 0 Array(Int64) true Array(Int64) true +[1,2,3] 2020-01-01 0 Array(Int64) true Date true +[1,2,3] 42 0 Array(Int64) true Int64 false +[1,2,3] abc 0 Array(Int64) true String false +[1,2,3] \N 0 Array(Int64) true None false +[1,2,4] [1,2,3] 1 Array(Int64) true Array(Int64) true +2020-01-01 [1,2,3] 1 Date true Array(Int64) true +2020-01-01 2020-01-01 0 Date true Date true +2020-01-01 2020-01-02 0 Date true Date true +2020-01-01 42 0 Date true Int64 false +2020-01-01 abc 0 Date true String false +2020-01-01 \N 0 Date true None false +2020-01-02 2020-01-01 1 Date true Date true +42 [1,2,3] 1 Int64 false Array(Int64) true +42 2020-01-01 1 Int64 false Date true +42 42 0 Int64 false Int64 false +42 43 0 Int64 false Int64 false +42 abc 0 Int64 false String false +42 \N 0 Int64 false None false +43 42 1 Int64 false Int64 false +abc [1,2,3] 1 String false Array(Int64) true +abc 2020-01-01 1 String false Date true +abc 42 1 String false Int64 false +abc abc 0 String false String false +abc abd 0 String false String false +abc \N 0 String false None false +abd abc 1 String false String false +\N [1,2,3] 1 None false Array(Int64) true +\N 2020-01-01 1 None false Date true +\N 42 1 None false Int64 false +\N abc 1 None false String false +\N \N 0 None false None false d1 >= d2 -[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) -[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) -[1,2,3] 42 1 Array(Int64) Int64 -[1,2,3] abc 1 Array(Int64) String -[1,2,3] \N 1 Array(Int64) None -[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) -42 [1,2,3] 1 Int64 Array(Int64) -42 42 1 Int64 Int64 -42 43 1 Int64 Int64 -42 abc 1 Int64 String -42 \N 1 Int64 None -43 42 1 Int64 Int64 -abc [1,2,3] 1 String Array(Int64) -abc 42 1 String Int64 -abc abc 1 String String -abc abd 1 String String -abc \N 1 String None -abd abc 1 String String -\N [1,2,3] 1 None Array(Int64) -\N 42 1 None Int64 -\N abc 1 None String -\N \N 1 None None +[1,2,3] [1,2,3] 1 Array(Int64) true Array(Int64) true +[1,2,3] [1,2,4] 1 Array(Int64) true Array(Int64) true +[1,2,3] 2020-01-01 1 Array(Int64) true Date true +[1,2,3] 42 1 Array(Int64) true Int64 false +[1,2,3] abc 1 Array(Int64) true String false +[1,2,3] \N 1 Array(Int64) true None false +[1,2,4] [1,2,3] 1 Array(Int64) true Array(Int64) true +2020-01-01 [1,2,3] 1 Date true Array(Int64) true +2020-01-01 2020-01-01 1 Date true Date true +2020-01-01 2020-01-02 1 Date true Date true +2020-01-01 42 1 Date true Int64 false +2020-01-01 abc 1 Date true String false +2020-01-01 \N 1 Date true None false +2020-01-02 2020-01-01 1 Date true Date true +42 [1,2,3] 1 Int64 false Array(Int64) true +42 2020-01-01 1 Int64 false Date true +42 42 1 Int64 false Int64 false +42 43 1 Int64 false Int64 false +42 abc 1 Int64 false String false +42 \N 1 Int64 false None false +43 42 1 Int64 false Int64 false +abc [1,2,3] 1 String false Array(Int64) true +abc 2020-01-01 1 String false Date true +abc 42 1 String false Int64 false +abc abc 1 String false String false +abc abd 1 String false String false +abc \N 1 String false None false +abd abc 1 String false String false +\N [1,2,3] 1 None false Array(Int64) true +\N 2020-01-01 1 None false Date true +\N 42 1 None false Int64 false +\N abc 1 None false String false +\N \N 1 None false None false diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql index 0487fafc955..e0039a348c6 100644 --- a/tests/queries/0_stateless/03035_dynamic_sorting.sql +++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql @@ -1,80 +1,55 @@ set allow_experimental_dynamic_type = 1; drop table if exists test; -create table test (d1 Dynamic, d2 Dynamic) engine=Memory; - -insert into test values (42, 42); -insert into test values (42, 43); -insert into test values (43, 42); - -insert into test values ('abc', 'abc'); -insert into test values ('abc', 'abd'); -insert into test values ('abd', 'abc'); - -insert into test values ([1,2,3], [1,2,3]); -insert into test values ([1,2,3], [1,2,4]); -insert into test values ([1,2,4], [1,2,3]); - -insert into test values (NULL, NULL); - -insert into test values (42, 'abc'); -insert into test values ('abc', 42); - -insert into test values (42, [1,2,3]); -insert into test values ([1,2,3], 42); - -insert into test values (42, NULL); -insert into test values (NULL, 42); - -insert into test values ('abc', [1,2,3]); -insert into test values ([1,2,3], 'abc'); - -insert into test values ('abc', NULL); -insert into test values (NULL, 'abc'); - -insert into test values ([1,2,3], NULL); -insert into test values (NULL, [1,2,3]); +create table test (d1 Dynamic(max_types=2), d2 Dynamic(max_types=2)) engine=Memory; +insert into test values (42, 42), (42, 43), (43, 42), ('abc', 'abc'), ('abc', 'abd'), ('abd', 'abc'), +([1,2,3], [1,2,3]), ([1,2,3], [1,2,4]), ([1,2,4], [1,2,3]), +('2020-01-01', '2020-01-01'), ('2020-01-01', '2020-01-02'), ('2020-01-02', '2020-01-01'), +(NULL, NULL), (42, 'abc'), ('abc', 42), (42, [1,2,3]), ([1,2,3], 42), (42, NULL), (NULL, 42), +('abc', [1,2,3]), ([1,2,3], 'abc'), ('abc', NULL), (NULL, 'abc'), ([1,2,3], NULL), (NULL, [1,2,3]), +(42, '2020-01-01'), ('2020-01-01', 42), ('2020-01-01', 'abc'), ('abc', '2020-01-01'), +('2020-01-01', [1,2,3]), ([1,2,3], '2020-01-01'), ('2020-01-01', NULL), (NULL, '2020-01-01'); select 'order by d1 nulls first'; -select d1, dynamicType(d1) from test order by d1 nulls first; +select d1, dynamicType(d1), isDynamicElementInSharedData(d1) from test order by d1 nulls first; select 'order by d1 nulls last'; -select d1, dynamicType(d1) from test order by d1 nulls last; +select d1, dynamicType(d1), isDynamicElementInSharedData(d1) from test order by d1 nulls last; select 'order by d2 nulls first'; -select d2, dynamicType(d2) from test order by d2 nulls first; +select d2, dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2 nulls first; select 'order by d2 nulls last'; -select d2, dynamicType(d2) from test order by d2 nulls last; +select d2, dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2 nulls last; select 'order by d1, d2 nulls first'; -select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first; +select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2 nulls first; select 'order by d1, d2 nulls last'; -select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last; +select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2 nulls last; select 'order by d2, d1 nulls first'; -select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first; +select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2, d1 nulls first; select 'order by d2, d1 nulls last'; -select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last; +select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2, d1 nulls last; select 'd1 = d2'; -select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; +select d1, d2, d1 = d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2; select 'd1 < d2'; -select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; +select d1, d2, d1 < d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2; select 'd1 <= d2'; -select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; +select d1, d2, d1 <= d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2; select 'd1 > d2'; -select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; +select d1, d2, d1 > d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2; select 'd1 >= d2'; -select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; +select d1, d2, d2 >= d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2; drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference new file mode 100644 index 00000000000..ca6c5dbba82 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference @@ -0,0 +1,20 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +360000 +360000 +200000 +200000 +0 +0 +20000 +20000 +200000 +200000 +20000 +20000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql new file mode 100644 index 00000000000..bff28fb5c90 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql @@ -0,0 +1,43 @@ +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'LowCardinality(String)'; +select count() from test where d.`LowCardinality(String)` is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`LowCardinality(String)`, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`LowCardinality(String)`, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`LowCardinality(String)`, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference new file mode 100644 index 00000000000..ca6c5dbba82 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference @@ -0,0 +1,20 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +360000 +360000 +200000 +200000 +0 +0 +20000 +20000 +200000 +200000 +20000 +20000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql new file mode 100644 index 00000000000..4eed3d15529 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql @@ -0,0 +1,43 @@ +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=2)) engine=Memory; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'LowCardinality(String)'; +select count() from test where d.`LowCardinality(String)` is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2 new file mode 100644 index 00000000000..de12c6b8737 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2 @@ -0,0 +1,2463 @@ +Memory +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +36 +36 +20 +20 +0 +0 +2 +2 +20 +20 +2 +2 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +str_10 \N str_10 [] +10 10 \N [] +10 10 \N [] +[[0,1]] \N \N [] +str_11 \N \N [] +str_11 \N str_11 [] +str_12 \N str_12 [] +12 12 \N [] +12 12 \N [] +str_13 \N str_13 [] +13 13 \N [] +13 13 \N [] +str_14 \N str_14 [] +14 14 \N [] +14 14 \N [] +str_15 \N str_15 [] +15 15 \N [] +15 15 \N [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N \N [] +str_16 \N str_16 [] +str_17 \N str_17 [] +17 17 \N [] +17 17 \N [] +str_18 \N str_18 [] +18 18 \N [] +18 18 \N [] +str_19 \N str_19 [] +19 19 \N [] +19 19 \N [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N str_10 [] +10 \N [] +10 \N [] +\N \N [] +\N \N [] +\N str_11 [] +\N str_12 [] +12 \N [] +12 \N [] +\N str_13 [] +13 \N [] +13 \N [] +\N str_14 [] +14 \N [] +14 \N [] +\N str_15 [] +15 \N [] +15 \N [] +\N \N [] +\N \N [] +\N str_16 [] +\N str_17 [] +17 \N [] +17 \N [] +\N str_18 [] +18 \N [] +18 \N [] +\N str_19 [] +19 \N [] +19 \N [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +str_10 \N \N [] 0 [] +10 10 \N [] 0 [] +10 10 \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_12 \N \N [] 0 [] +12 12 \N [] 0 [] +12 12 \N [] 0 [] +str_13 \N \N [] 0 [] +13 13 \N [] 0 [] +13 13 \N [] 0 [] +str_14 \N \N [] 0 [] +14 14 \N [] 0 [] +14 14 \N [] 0 [] +str_15 \N \N [] 0 [] +15 15 \N [] 0 [] +15 15 \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_17 \N \N [] 0 [] +17 17 \N [] 0 [] +17 17 \N [] 0 [] +str_18 \N \N [] 0 [] +18 18 \N [] 0 [] +18 18 \N [] 0 [] +str_19 \N \N [] 0 [] +19 19 \N [] 0 [] +19 19 \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +10 \N [] 0 [] [] +10 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +12 \N [] 0 [] [] +12 \N [] 0 [] [] +\N \N [] 0 [] [] +13 \N [] 0 [] [] +13 \N [] 0 [] [] +\N \N [] 0 [] [] +14 \N [] 0 [] [] +14 \N [] 0 [] [] +\N \N [] 0 [] [] +15 \N [] 0 [] [] +15 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +17 \N [] 0 [] [] +17 \N [] 0 [] [] +\N \N [] 0 [] [] +18 \N [] 0 [] [] +18 \N [] 0 [] [] +\N \N [] 0 [] [] +19 \N [] 0 [] [] +19 \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +str_10 0 [] [] +10 0 [] [] +10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +str_11 0 [] [] +str_12 0 [] [] +12 0 [] [] +12 0 [] [] +str_13 0 [] [] +13 0 [] [] +13 0 [] [] +str_14 0 [] [] +14 0 [] [] +14 0 [] [] +str_15 0 [] [] +15 0 [] [] +15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +str_16 0 [] [] +str_17 0 [] [] +17 0 [] [] +17 0 [] [] +str_18 0 [] [] +18 0 [] [] +18 0 [] [] +str_19 0 [] [] +19 0 [] [] +19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000 +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +36 +36 +20 +20 +0 +0 +2 +2 +20 +20 +2 +2 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +str_10 \N str_10 [] +10 10 \N [] +10 10 \N [] +[[0,1]] \N \N [] +str_11 \N \N [] +str_11 \N str_11 [] +str_12 \N str_12 [] +12 12 \N [] +12 12 \N [] +str_13 \N str_13 [] +13 13 \N [] +13 13 \N [] +str_14 \N str_14 [] +14 14 \N [] +14 14 \N [] +str_15 \N str_15 [] +15 15 \N [] +15 15 \N [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N \N [] +str_16 \N str_16 [] +str_17 \N str_17 [] +17 17 \N [] +17 17 \N [] +str_18 \N str_18 [] +18 18 \N [] +18 18 \N [] +str_19 \N str_19 [] +19 19 \N [] +19 19 \N [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N str_10 [] +10 \N [] +10 \N [] +\N \N [] +\N \N [] +\N str_11 [] +\N str_12 [] +12 \N [] +12 \N [] +\N str_13 [] +13 \N [] +13 \N [] +\N str_14 [] +14 \N [] +14 \N [] +\N str_15 [] +15 \N [] +15 \N [] +\N \N [] +\N \N [] +\N str_16 [] +\N str_17 [] +17 \N [] +17 \N [] +\N str_18 [] +18 \N [] +18 \N [] +\N str_19 [] +19 \N [] +19 \N [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +str_10 \N \N [] 0 [] +10 10 \N [] 0 [] +10 10 \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_12 \N \N [] 0 [] +12 12 \N [] 0 [] +12 12 \N [] 0 [] +str_13 \N \N [] 0 [] +13 13 \N [] 0 [] +13 13 \N [] 0 [] +str_14 \N \N [] 0 [] +14 14 \N [] 0 [] +14 14 \N [] 0 [] +str_15 \N \N [] 0 [] +15 15 \N [] 0 [] +15 15 \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_17 \N \N [] 0 [] +17 17 \N [] 0 [] +17 17 \N [] 0 [] +str_18 \N \N [] 0 [] +18 18 \N [] 0 [] +18 18 \N [] 0 [] +str_19 \N \N [] 0 [] +19 19 \N [] 0 [] +19 19 \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +10 \N [] 0 [] [] +10 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +12 \N [] 0 [] [] +12 \N [] 0 [] [] +\N \N [] 0 [] [] +13 \N [] 0 [] [] +13 \N [] 0 [] [] +\N \N [] 0 [] [] +14 \N [] 0 [] [] +14 \N [] 0 [] [] +\N \N [] 0 [] [] +15 \N [] 0 [] [] +15 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +17 \N [] 0 [] [] +17 \N [] 0 [] [] +\N \N [] 0 [] [] +18 \N [] 0 [] [] +18 \N [] 0 [] [] +\N \N [] 0 [] [] +19 \N [] 0 [] [] +19 \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +str_10 0 [] [] +10 0 [] [] +10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +str_11 0 [] [] +str_12 0 [] [] +12 0 [] [] +12 0 [] [] +str_13 0 [] [] +13 0 [] [] +13 0 [] [] +str_14 0 [] [] +14 0 [] [] +14 0 [] [] +str_15 0 [] [] +15 0 [] [] +15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +str_16 0 [] [] +str_17 0 [] [] +17 0 [] [] +17 0 [] [] +str_18 0 [] [] +18 0 [] [] +18 0 [] [] +str_19 0 [] [] +19 0 [] [] +19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1 +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +36 +36 +20 +20 +0 +0 +2 +2 +20 +20 +2 +2 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +str_10 \N str_10 [] +10 10 \N [] +10 10 \N [] +[[0,1]] \N \N [] +str_11 \N \N [] +str_11 \N str_11 [] +str_12 \N str_12 [] +12 12 \N [] +12 12 \N [] +str_13 \N str_13 [] +13 13 \N [] +13 13 \N [] +str_14 \N str_14 [] +14 14 \N [] +14 14 \N [] +str_15 \N str_15 [] +15 15 \N [] +15 15 \N [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N \N [] +str_16 \N str_16 [] +str_17 \N str_17 [] +17 17 \N [] +17 17 \N [] +str_18 \N str_18 [] +18 18 \N [] +18 18 \N [] +str_19 \N str_19 [] +19 19 \N [] +19 19 \N [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N str_10 [] +10 \N [] +10 \N [] +\N \N [] +\N \N [] +\N str_11 [] +\N str_12 [] +12 \N [] +12 \N [] +\N str_13 [] +13 \N [] +13 \N [] +\N str_14 [] +14 \N [] +14 \N [] +\N str_15 [] +15 \N [] +15 \N [] +\N \N [] +\N \N [] +\N str_16 [] +\N str_17 [] +17 \N [] +17 \N [] +\N str_18 [] +18 \N [] +18 \N [] +\N str_19 [] +19 \N [] +19 \N [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +str_10 \N \N [] 0 [] +10 10 \N [] 0 [] +10 10 \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_11 \N \N [] 0 [] +str_12 \N \N [] 0 [] +12 12 \N [] 0 [] +12 12 \N [] 0 [] +str_13 \N \N [] 0 [] +13 13 \N [] 0 [] +13 13 \N [] 0 [] +str_14 \N \N [] 0 [] +14 14 \N [] 0 [] +14 14 \N [] 0 [] +str_15 \N \N [] 0 [] +15 15 \N [] 0 [] +15 15 \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_16 \N \N [] 0 [] +str_17 \N \N [] 0 [] +17 17 \N [] 0 [] +17 17 \N [] 0 [] +str_18 \N \N [] 0 [] +18 18 \N [] 0 [] +18 18 \N [] 0 [] +str_19 \N \N [] 0 [] +19 19 \N [] 0 [] +19 19 \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +10 \N [] 0 [] [] +10 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +12 \N [] 0 [] [] +12 \N [] 0 [] [] +\N \N [] 0 [] [] +13 \N [] 0 [] [] +13 \N [] 0 [] [] +\N \N [] 0 [] [] +14 \N [] 0 [] [] +14 \N [] 0 [] [] +\N \N [] 0 [] [] +15 \N [] 0 [] [] +15 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +17 \N [] 0 [] [] +17 \N [] 0 [] [] +\N \N [] 0 [] [] +18 \N [] 0 [] [] +18 \N [] 0 [] [] +\N \N [] 0 [] [] +19 \N [] 0 [] [] +19 \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +str_10 0 [] [] +10 0 [] [] +10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +str_11 0 [] [] +str_12 0 [] [] +12 0 [] [] +12 0 [] [] +str_13 0 [] [] +13 0 [] [] +13 0 [] [] +str_14 0 [] [] +14 0 [] [] +14 0 [] [] +str_15 0 [] [] +15 0 [] [] +15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +str_16 0 [] [] +str_17 0 [] [] +17 0 [] [] +17 0 [] [] +str_18 0 [] [] +18 0 [] [] +18 0 [] [] +str_19 0 [] [] +19 0 [] [] +19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2 new file mode 100644 index 00000000000..dde4f3f53c3 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2 @@ -0,0 +1,47 @@ +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; + +{% for engine in ['Memory', 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1'] -%} + +select '{{ engine }}'; +create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }}; + +insert into test select number, number from numbers(10); +insert into test select number, 'str_' || toString(number) from numbers(10, 10); +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10); +insert into test select number, NULL from numbers(30, 10); +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40); +insert into test select number, if(number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(10, 10); +insert into test select number, if(number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(10, 10); + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'LowCardinality(String)'; +select count() from test where d.`LowCardinality(String)` is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.Int8, d.Date, d.`Array(String)` from test order by id, d; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test order by id, d; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test order by id, d; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference new file mode 100644 index 00000000000..ca6c5dbba82 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference @@ -0,0 +1,20 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +LowCardinality(String) +None +String +UInt64 +360000 +360000 +200000 +200000 +0 +0 +20000 +20000 +200000 +200000 +20000 +20000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql new file mode 100644 index 00000000000..61dc8fca01a --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql @@ -0,0 +1,43 @@ +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'LowCardinality(String)'; +select count() from test where d.`LowCardinality(String)` is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql index ddfba4418bd..822393d3c78 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql index 5aac5f7b72f..2394893dc8b 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference index d0d777a5a38..b0be05f07a2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference @@ -1,28 +1,66 @@ -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String +50000 DateTime false +60000 Date false +70000 Array(UInt16) false +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +30000 String false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +100000 None false +100000 UInt64 true +110000 String false +200000 Map(UInt64, UInt64) false diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index d2c787040e5..9bd2aee06ae 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,8 +1,10 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); @@ -12,22 +14,37 @@ insert into test select number, toDate(number) from numbers(60000); insert into test select number, toDateTime(number) from numbers(50000); insert into test select number, NULL from numbers(100000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final;; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, map(number, number) from numbers(200000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, tuple(number, number) from numbers(10000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, 'str_' || number from numbers(30000); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference index d0d777a5a38..b0be05f07a2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference @@ -1,28 +1,66 @@ -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String +50000 DateTime false +60000 Date false +70000 Array(UInt16) false +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +30000 String false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +100000 None false +100000 UInt64 true +110000 String false +200000 Map(UInt64, UInt64) false diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index f99bf771608..ee2dadd308c 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,8 +1,10 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); @@ -12,22 +14,36 @@ insert into test select number, toDate(number) from numbers(60000); insert into test select number, toDateTime(number) from numbers(50000); insert into test select number, NULL from numbers(100000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final;; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, map(number, number) from numbers(200000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, tuple(number, number) from numbers(10000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, 'str_' || number from numbers(30000); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference index d0d777a5a38..b0be05f07a2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference @@ -1,28 +1,66 @@ -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String +50000 DateTime false +60000 Date false +70000 Array(UInt16) false +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +30000 String false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +100000 None false +100000 UInt64 true +110000 String false +200000 Map(UInt64, UInt64) false diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index be81596d043..6c2ce8f9e6a 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,8 +1,10 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); @@ -12,23 +14,36 @@ insert into test select number, toDate(number) from numbers(60000); insert into test select number, toDateTime(number) from numbers(50000); insert into test select number, NULL from numbers(100000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -system start merges test; -optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, map(number, number) from numbers(200000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, tuple(number, number) from numbers(10000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, 'str_' || number from numbers(30000); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference index d0d777a5a38..b0be05f07a2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference @@ -1,28 +1,66 @@ -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String +50000 DateTime false +60000 Date false +70000 Array(UInt16) false +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String false +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +30000 String false +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +80000 String true +100000 None false +100000 UInt64 false +200000 Map(UInt64, UInt64) false +--------------------- +10000 Tuple(UInt64, UInt64) true +50000 DateTime true +60000 Date true +70000 Array(UInt16) true +100000 None false +100000 UInt64 true +110000 String false +200000 Map(UInt64, UInt64) false diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index f6396af42a8..2350cddd21c 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,8 +1,10 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); @@ -12,22 +14,36 @@ insert into test select number, toDate(number) from numbers(60000); insert into test select number, toDateTime(number) from numbers(50000); insert into test select number, NULL from numbers(100000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final;; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, map(number, number) from numbers(200000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, tuple(number, number) from numbers(10000); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, 'str_' || number from numbers(30000); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index fa64ed2f8fd..7f1934091f2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index 4b8a036f166..f1f387fae9d 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index a4e67de76db..cc11c454d38 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index dd643f8dffd..ffb2aca8b35 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 index 96a854630ed..7d3bc371e36 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 +++ b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 @@ -1,112 +1,264 @@ -5 DateTime -6 Date -7 Array(UInt16) -8 String -10 None -10 UInt64 -7 Array(UInt16) -10 None -10 UInt64 -19 String -7 Array(UInt16) -10 None -10 UInt64 -19 String -20 Map(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -1 Tuple(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -10 None -10 UInt64 -20 Map(UInt64, UInt64) -27 String -5 DateTime -6 Date -7 Array(UInt16) -8 String -10 None -10 UInt64 -7 Array(UInt16) -10 None -10 UInt64 -19 String -7 Array(UInt16) -10 None -10 UInt64 -19 String -20 Map(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -1 Tuple(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -10 None -10 UInt64 -20 Map(UInt64, UInt64) -27 String -5 DateTime -6 Date -7 Array(UInt16) -8 String -10 None -10 UInt64 -7 Array(UInt16) -10 None -10 UInt64 -19 String -7 Array(UInt16) -10 None -10 UInt64 -19 String -20 Map(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -1 Tuple(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -10 None -10 UInt64 -20 Map(UInt64, UInt64) -27 String -5 DateTime -6 Date -7 Array(UInt16) -8 String -10 None -10 UInt64 -7 Array(UInt16) -10 None -10 UInt64 -19 String -7 Array(UInt16) -10 None -10 UInt64 -19 String -20 Map(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -1 Tuple(UInt64, UInt64) -10 None -10 UInt64 -20 Map(UInt64, UInt64) -26 String -10 None -10 UInt64 -20 Map(UInt64, UInt64) -27 String +5 DateTime false +6 Date false +7 Array(UInt16) false +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +3 String false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +10 None false +10 UInt64 true +11 String false +20 Map(UInt64, UInt64) false +5 DateTime false +6 Date false +7 Array(UInt16) false +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +3 String false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +10 None false +10 UInt64 true +11 String false +20 Map(UInt64, UInt64) false +5 DateTime false +6 Date false +7 Array(UInt16) false +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +3 String false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +10 None false +10 UInt64 true +11 String false +20 Map(UInt64, UInt64) false +5 DateTime false +6 Date false +7 Array(UInt16) false +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String false +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +3 String false +5 DateTime true +6 Date true +7 Array(UInt16) true +8 String true +10 None false +10 UInt64 false +20 Map(UInt64, UInt64) false +--------------------- +1 Tuple(UInt64, UInt64) true +5 DateTime true +6 Date true +7 Array(UInt16) true +10 None false +10 UInt64 true +11 String false +20 Map(UInt64, UInt64) false diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 index 263e92be403..71d6fc2540c 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 +++ b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 @@ -2,6 +2,7 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set allow_experimental_dynamic_type = 1; + drop table if exists test; {% for engine in ['MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', @@ -9,7 +10,7 @@ drop table if exists test; 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1'] -%} -create table test (id UInt64, d Dynamic(max_types=3)) engine={{ engine }}; +create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }}; system stop merges test; insert into test select number, number from numbers(10); @@ -19,23 +20,37 @@ insert into test select number, toDate(number) from numbers(6); insert into test select number, toDateTime(number) from numbers(5); insert into test select number, NULL from numbers(10); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final;; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, map(number, number) from numbers(20); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system stop merges test; insert into test select number, tuple(number, number) from numbers(1); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); system start merges test; optimize table test final; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, 'str_' || number from numbers(3); +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference index 4be740f6050..a4c2df74a74 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference @@ -1,21 +1,63 @@ -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None +6667 Tuple(a Dynamic(max_types=2)):DateTime false +13333 Tuple(a Dynamic(max_types=2)):IPv4 false +16667 Tuple(a Dynamic(max_types=2)):Date false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +16667 Tuple(a Dynamic(max_types=2)):DateTime false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +40000 Tuple(a Dynamic(max_types=2)):DateTime false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +63334 Tuple(a Dynamic(max_types=2)):DateTime true +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index 81888946681..e3b8ea63582 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; @@ -6,24 +7,39 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); -insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); -insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; -insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); -insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); +system stop merges test; +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; + +system stop merges test; +insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000); + +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference index 4be740f6050..a4c2df74a74 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference @@ -1,21 +1,63 @@ -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None +6667 Tuple(a Dynamic(max_types=2)):DateTime false +13333 Tuple(a Dynamic(max_types=2)):IPv4 false +16667 Tuple(a Dynamic(max_types=2)):Date false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +16667 Tuple(a Dynamic(max_types=2)):DateTime false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +40000 Tuple(a Dynamic(max_types=2)):DateTime false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +63334 Tuple(a Dynamic(max_types=2)):DateTime true +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index ba58ca471a2..db11dfc93e2 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; @@ -6,24 +7,39 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); -insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); -insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; -insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); -insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); +system stop merges test; +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; + +system stop merges test; +insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000); + +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2 b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2 index ae07c164074..3d7e8b60f73 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2 +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2 @@ -1,84 +1,256 @@ -2 Tuple(a Dynamic(max_types=3)):Date -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):String -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 UInt64:None -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 UInt64:None -2 Tuple(a Dynamic(max_types=3)):DateTime -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -13 Tuple(a Dynamic(max_types=3)):None -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -12 Tuple(a Dynamic(max_types=3)):String -13 Tuple(a Dynamic(max_types=3)):None -2 Tuple(a Dynamic(max_types=3)):Date -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):String -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 UInt64:None -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 UInt64:None -2 Tuple(a Dynamic(max_types=3)):DateTime -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -13 Tuple(a Dynamic(max_types=3)):None -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -12 Tuple(a Dynamic(max_types=3)):String -13 Tuple(a Dynamic(max_types=3)):None -2 Tuple(a Dynamic(max_types=3)):Date -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):String -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 UInt64:None -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 UInt64:None -2 Tuple(a Dynamic(max_types=3)):DateTime -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -13 Tuple(a Dynamic(max_types=3)):None -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -12 Tuple(a Dynamic(max_types=3)):String -13 Tuple(a Dynamic(max_types=3)):None -2 Tuple(a Dynamic(max_types=3)):Date -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):String -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 UInt64:None -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 UInt64:None -2 Tuple(a Dynamic(max_types=3)):DateTime -3 Tuple(a Dynamic(max_types=3)):Array(UInt8) -5 Tuple(a Dynamic(max_types=3)):UInt64 -7 Tuple(a Dynamic(max_types=3)):String -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -13 Tuple(a Dynamic(max_types=3)):None -5 Tuple(a Dynamic(max_types=3)):UInt64 -10 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -10 UInt64:None -12 Tuple(a Dynamic(max_types=3)):String -13 Tuple(a Dynamic(max_types=3)):None +test +2 Tuple(a Dynamic(max_types=2)):DateTime false +2 Tuple(a Dynamic(max_types=2)):IPv4 false +4 Tuple(a Dynamic(max_types=2)):Date false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +4 Tuple(a Dynamic(max_types=2)):DateTime false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +8 Tuple(a Dynamic(max_types=2)):DateTime false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +14 Tuple(a Dynamic(max_types=2)):DateTime true +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +test +2 Tuple(a Dynamic(max_types=2)):DateTime false +2 Tuple(a Dynamic(max_types=2)):IPv4 false +4 Tuple(a Dynamic(max_types=2)):Date false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +4 Tuple(a Dynamic(max_types=2)):DateTime false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +8 Tuple(a Dynamic(max_types=2)):DateTime false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +14 Tuple(a Dynamic(max_types=2)):DateTime true +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +test +2 Tuple(a Dynamic(max_types=2)):DateTime false +2 Tuple(a Dynamic(max_types=2)):IPv4 false +4 Tuple(a Dynamic(max_types=2)):Date false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +4 Tuple(a Dynamic(max_types=2)):DateTime false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +8 Tuple(a Dynamic(max_types=2)):DateTime false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String true +14 Tuple(a Dynamic(max_types=2)):DateTime false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +test +2 Tuple(a Dynamic(max_types=2)):DateTime false +2 Tuple(a Dynamic(max_types=2)):IPv4 false +4 Tuple(a Dynamic(max_types=2)):Date false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):DateTime true +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +4 Tuple(a Dynamic(max_types=2)):DateTime false +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 false +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +6 Tuple(a Dynamic(max_types=2)):DateTime true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +8 Tuple(a Dynamic(max_types=2)):DateTime false +13 Tuple(a Dynamic(max_types=2)):String false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false +--------------------- +2 Tuple(a Dynamic(max_types=2)):IPv4 true +4 Tuple(a Dynamic(max_types=2)):Date true +6 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +7 Tuple(a Dynamic(max_types=2)):UInt64 true +8 String:None false +13 Tuple(a Dynamic(max_types=2)):String true +14 Tuple(a Dynamic(max_types=2)):DateTime false +20 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +26 Tuple(a Dynamic(max_types=2)):None false +28 UInt64:None false diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2 index 7828c2af49c..8682b6cef81 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2 +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2 @@ -10,25 +10,41 @@ drop table if exists test; 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1'] -%} -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; +select 'test'; +create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }}; system stop merges test; -insert into test select number, number from numbers(10); -insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(10); -insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(5); +insert into test select number, number from numbers(20); +insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(20); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(10); +insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(20); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; -insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(5); -insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(20); +system stop merges test; +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(10); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(40); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; + +system stop merges test; +insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(8); + +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference index 4be740f6050..a4c2df74a74 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference @@ -1,21 +1,63 @@ -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None +6667 Tuple(a Dynamic(max_types=2)):DateTime false +13333 Tuple(a Dynamic(max_types=2)):IPv4 false +16667 Tuple(a Dynamic(max_types=2)):Date false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +16667 Tuple(a Dynamic(max_types=2)):DateTime false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +40000 Tuple(a Dynamic(max_types=2)):DateTime false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +63334 Tuple(a Dynamic(max_types=2)):DateTime true +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index a53c5b0b2a5..4ed4d00fe95 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; @@ -6,24 +7,39 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); -insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); -insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; -insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); -insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); +system stop merges test; +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; + +system stop merges test; +insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000); + +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference index 4be740f6050..a4c2df74a74 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference @@ -1,21 +1,63 @@ -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None +6667 Tuple(a Dynamic(max_types=2)):DateTime false +13333 Tuple(a Dynamic(max_types=2)):IPv4 false +16667 Tuple(a Dynamic(max_types=2)):Date false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) false +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +140000 UInt64:None false +--------------------- +6667 Tuple(a Dynamic(max_types=2)):DateTime true +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +16667 Tuple(a Dynamic(max_types=2)):DateTime false +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 false +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +23334 Tuple(a Dynamic(max_types=2)):DateTime true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +40000 Tuple(a Dynamic(max_types=2)):DateTime false +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false +--------------------- +13333 Tuple(a Dynamic(max_types=2)):IPv4 true +16667 Tuple(a Dynamic(max_types=2)):Date true +33333 Tuple(a Dynamic(max_types=2)):Array(UInt8) true +33334 Tuple(a Dynamic(max_types=2)):UInt64 true +40000 String:None false +63334 Tuple(a Dynamic(max_types=2)):DateTime true +66666 Tuple(a Dynamic(max_types=2)):String false +100000 Tuple(a Dynamic(max_types=2)):Tuple(UInt64) false +133333 Tuple(a Dynamic(max_types=2)):None false +140000 UInt64:None false diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index 4256b010ec0..2f8b258ba8e 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; @@ -6,24 +7,39 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; +create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); -insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); -insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; -insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); -insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); +system stop merges test; +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000); -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; system start merges test; optimize table test final; -select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; + +system stop merges test; +insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000); + +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; +system start merges test; +optimize table test final; +select '---------------------'; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type; drop table test; diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference index 2ec301b747b..9386548c74d 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference @@ -1,13 +1,13 @@ initial insert alter add column 1 -3 None +3 None false 0 0 \N \N \N 0 1 1 \N \N \N 0 2 2 \N \N \N 0 insert after alter add column 1 -4 String -4 UInt64 -7 None +4 String false +4 UInt64 false +7 None false 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 @@ -24,147 +24,158 @@ insert after alter add column 1 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 alter modify column 1 -7 None -8 String +4 String true +4 UInt64 true +7 None false 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 insert after alter modify column 1 -8 None -11 String +1 Date true +5 String true +5 UInt64 true +8 None false 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 alter modify column 2 -8 None -11 String +1 Date true +5 String true +5 UInt64 true +8 None false 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 insert after alter modify column 2 -1 Date -1 UInt64 -9 None -12 String +1 String false +1 UInt64 false +2 Date true +5 String true +5 UInt64 true +9 None false 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 19 19 \N \N \N \N 0 20 20 20 \N 20 \N 0 21 21 str_21 str_21 \N \N 0 22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 -1 Date -1 UInt64 -9 None -12 String +1 String false +1 UInt64 false +2 Date true +5 String true +5 UInt64 true +9 None false 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 6 6 6 \N 0 str_6 \N \N 0 7 7 7 \N 0 str_7 \N \N 0 8 8 8 \N 0 str_8 \N \N 0 9 9 9 \N 0 \N \N \N 0 10 10 10 \N 0 \N \N \N 0 11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 +12 12 12 \N 0 \N 12 \N 0 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 -1 Date -1 UInt64 -12 None -12 String +1 String false +1 UInt64 false +2 Date true +5 String true +5 UInt64 true +12 None false 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 6 6 6 \N 0 str_6 \N \N 0 7 7 7 \N 0 str_7 \N \N 0 8 8 8 \N 0 str_8 \N \N 0 9 9 9 \N 0 \N \N \N 0 10 10 10 \N 0 \N \N \N 0 11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 +12 12 12 \N 0 \N 12 \N 0 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql index 4ab700306d4..de05ba36177 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql @@ -9,7 +9,7 @@ insert into test select number, number from numbers(3); select 'alter add column 1'; alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; select 'insert after alter add column 1'; @@ -17,37 +17,37 @@ insert into test select number, number, number from numbers(3, 3); insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); insert into test select number, number, NULL from numbers(9, 3); insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'alter modify column 1'; -alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +alter table test modify column d Dynamic(max_types=0) settings mutations_sync=1; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'insert after alter modify column 1'; insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'alter modify column 2'; -alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +alter table test modify column d Dynamic(max_types=2) settings mutations_sync=1; +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'insert after alter modify column 2'; insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'alter modify column 3'; alter table test modify column y Dynamic settings mutations_sync=1; -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; select 'insert after alter modify column 3'; insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); -select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d); select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference index c592528c3cd..d7123288280 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference @@ -24,48 +24,28 @@ insert after alter add column 1 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 alter modify column 1 +4 String +4 UInt64 7 None -8 String 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -4 UInt64 -7 String +1 Date +5 String +5 UInt64 8 None 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 @@ -83,13 +63,37 @@ alter modify column 2 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 +18 18 1970-01-19 \N \N 1970-01-19 0 +alter modify column 2 1 Date +5 String 5 UInt64 -8 String +8 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 \N 16 \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 +insert after alter modify column 2 +2 Date +6 String +6 UInt64 9 None 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 @@ -107,17 +111,17 @@ insert after alter modify column 2 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 19 19 \N \N \N \N 0 20 20 20 \N 20 \N 0 21 21 str_21 str_21 \N \N 0 22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 -1 Date -5 UInt64 -8 String +2 Date +6 String +6 UInt64 9 None 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 @@ -135,17 +139,17 @@ alter modify column 3 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 -1 Date -5 UInt64 -8 String +2 Date +6 String +6 UInt64 12 None 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 @@ -163,9 +167,9 @@ insert after alter modify column 3 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference index 2ec301b747b..d7123288280 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference @@ -24,147 +24,152 @@ insert after alter add column 1 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 alter modify column 1 +4 String +4 UInt64 7 None -8 String 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 1 Date -1 UInt64 -9 None -12 String +5 String +5 UInt64 +8 None 0 0 \N \N \N \N 0 1 1 \N \N \N \N 0 2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 6 6 str_6 str_6 \N \N 0 7 7 str_7 str_7 \N \N 0 8 8 str_8 str_8 \N \N 0 9 9 \N \N \N \N 0 10 10 \N \N \N \N 0 11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 +12 12 12 \N 12 \N 0 13 13 str_13 str_13 \N \N 0 14 14 \N \N \N \N 0 15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 +16 16 16 \N 16 \N 0 17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 +alter modify column 2 +1 Date +5 String +5 UInt64 +8 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 \N 16 \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 +insert after alter modify column 2 +2 Date +6 String +6 UInt64 +9 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 \N 16 \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 \N \N 1970-01-19 0 19 19 \N \N \N \N 0 20 20 20 \N 20 \N 0 21 21 str_21 str_21 \N \N 0 22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 -1 Date -1 UInt64 +2 Date +6 String +6 UInt64 9 None -12 String 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 6 6 6 \N 0 str_6 \N \N 0 7 7 7 \N 0 str_7 \N \N 0 8 8 8 \N 0 str_8 \N \N 0 9 9 9 \N 0 \N \N \N 0 10 10 10 \N 0 \N \N \N 0 11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 +12 12 12 \N 0 \N 12 \N 0 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 -1 Date -1 UInt64 +2 Date +6 String +6 UInt64 12 None -12 String 0 0 0 \N 0 \N \N \N 0 1 1 1 \N 0 \N \N \N 0 2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 6 6 6 \N 0 str_6 \N \N 0 7 7 7 \N 0 str_7 \N \N 0 8 8 8 \N 0 str_8 \N \N 0 9 9 9 \N 0 \N \N \N 0 10 10 10 \N 0 \N \N \N 0 11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 +12 12 12 \N 0 \N 12 \N 0 13 13 13 \N 0 str_13 \N \N 0 14 14 14 \N 0 \N \N \N 0 15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 +16 16 16 \N 0 \N 16 \N 0 17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 +18 18 18 \N 0 \N \N 1970-01-19 0 19 19 19 \N 0 \N \N \N 0 20 20 20 \N 0 \N 20 \N 0 21 21 21 \N 0 str_21 \N \N 0 diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh index c8bd533e253..da24b892cbd 100755 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh @@ -13,7 +13,7 @@ function run() $CH_CLIENT -q "insert into test select number, number from numbers(3)" echo "alter add column" - $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=2) settings mutations_sync=1" $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" diff --git a/tests/queries/0_stateless/03096_http_interface_role_query_param.sh b/tests/queries/0_stateless/03096_http_interface_role_query_param.sh index 5ae2d5f2f18..db42ee6bb41 100755 --- a/tests/queries/0_stateless/03096_http_interface_role_query_param.sh +++ b/tests/queries/0_stateless/03096_http_interface_role_query_param.sh @@ -21,7 +21,7 @@ CHANGED_SETTING_VALUE="42" SHOW_CURRENT_ROLES_QUERY="SELECT role_name FROM system.current_roles ORDER BY role_name ASC" SHOW_CHANGED_SETTINGS_QUERY="SELECT name, value FROM system.settings WHERE changed = 1 AND name = '$CHANGED_SETTING_NAME' ORDER BY name ASC" -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP USER IF EXISTS $TEST_USER; DROP ROLE IF EXISTS $TEST_ROLE1; DROP ROLE IF EXISTS $TEST_ROLE2; @@ -94,7 +94,7 @@ OUT=$($CLICKHOUSE_CURL -u $TEST_USER_AUTH -sS "$CLICKHOUSE_URL&role=$TEST_ROLE1& echo -ne $OUT | grep -o "Code: 512" || echo "expected code 512, got: $OUT" echo -ne $OUT | grep -o "SET_NON_GRANTED_ROLE" || echo "expected SET_NON_GRANTED_ROLE error, got: $OUT" -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP USER $TEST_USER; DROP ROLE $TEST_ROLE1; DROP ROLE $TEST_ROLE2; diff --git a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql index 4e42d94fd79..b3d1827d98f 100644 --- a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql +++ b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql @@ -6,14 +6,18 @@ CREATE TABLE test_table_1 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id +SETTINGS index_granularity = 16 # We have number of granules in the `EXPLAIN` output in reference file +; DROP TABLE IF EXISTS test_table_2; CREATE TABLE test_table_2 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id +SETTINGS index_granularity = 16 +; INSERT INTO test_table_1 VALUES (1, 'Value_1'), (2, 'Value_2'); INSERT INTO test_table_2 VALUES (2, 'Value_2'), (3, 'Value_3'); diff --git a/tests/queries/0_stateless/03140_client_subsequent_external_tables.sh b/tests/queries/0_stateless/03140_client_subsequent_external_tables.sh index af75bf42172..64398a515f5 100755 --- a/tests/queries/0_stateless/03140_client_subsequent_external_tables.sh +++ b/tests/queries/0_stateless/03140_client_subsequent_external_tables.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) INPUT_FILE=$CUR_DIR/$CLICKHOUSE_DATABASE.tsv echo "foo" > "$INPUT_FILE" -$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -nm -q " +$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -m -q " select * from t; select * from t; " diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference index 2850a8aba98..ae7f7c805f2 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -1,2 +1,2 @@ -49999983751397 10000032 -49999983751397 10000032 +7999995751397 4000032 +7999995751397 4000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 4b211a6a1e1..c93e6618ba9 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -12,7 +12,7 @@ AS toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, number % 4 AS key, number AS value - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) AS @@ -34,8 +34,9 @@ AS SELECT toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, 3 AS key - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); +SET max_rows_to_read = 0; SELECT SUM(value), COUNT(*) FROM skewed_probe diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.sh b/tests/queries/0_stateless/03143_prewhere_profile_events.sh index 00daa0fe7cc..6a6b993e5f8 100755 --- a/tests/queries/0_stateless/03143_prewhere_profile_events.sh +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS t; CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=0; @@ -25,7 +25,7 @@ client_opts=( --max_threads 8 ) -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 @@ -33,7 +33,7 @@ PREWHERE (b % 8192) = 42 FORMAT Null " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -42,7 +42,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 @@ -51,7 +51,7 @@ PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 settings enable_multiple_prewhere_read_steps=0; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -q " SELECT b, c FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -59,7 +59,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; -- 52503 which is 43 * number of granules, 10000000 diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh index 95aef9bbc5b..4e7b318e202 100755 --- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists tp_1; create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings max_parts_to_merge_at_once=1; insert into tp_1 select number, number from numbers(3); @@ -25,7 +25,7 @@ alter table tp_1 drop projection pp; alter table tp_1 attach partition '0'; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" @@ -34,19 +34,19 @@ $CLICKHOUSE_CLIENT -q " backup table tp_1 to Disk('backups', '$backup_id'); " | grep -o "BACKUP_CREATED" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1; restore table tp_1 from Disk('backups', '$backup_id'); " | grep -o "RESTORED" $CLICKHOUSE_CLIENT -q "select count() from tp_1;" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1" diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh index 6f70a0d2536..7f606d889a7 100755 --- a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash +# shellcheck disable=SC2266 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | grep -q "Sanity check: 'max_block_size' cannot be 0. Set to default value" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | + [ "$(grep -c "Sanity check: 'max_block_size' cannot be 0. Set to default value")" -gt 0 ] && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference index 0b76d30953e..370e6352657 100644 --- a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference @@ -1,35 +1,35 @@ 1 2024-01-01 Date 2 1704056400 Decimal(18, 3) -3 1 String -4 2 String +3 1 Float32 +4 2 Float64 1 2024-01-01 Date 1 2024-01-01 Date 2 1704056400 Decimal(18, 3) 2 1704056400 Decimal(18, 3) -3 1 String -3 1 String -4 2 String -4 2 String - -1 2024-01-01 String -1 2024-01-01 String -2 1704056400 String -2 1704056400 String -3 1 String -3 1 String -4 2 String -4 2 String +3 1 Float32 +3 1 Float32 +4 2 Float64 +4 2 Float64 1 2024-01-01 Date -1 2024-01-01 String -1 2024-01-01 String +1 2024-01-01 Date 2 1704056400 Decimal(18, 3) -2 1704056400 String -2 1704056400 String -3 1 String -3 1 String -3 1 String -4 2 String -4 2 String -4 2 String +2 1704056400 Decimal(18, 3) +3 1 Float32 +3 1 Float32 +4 2 Float64 +4 2 Float64 + +1 2024-01-01 Date +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 Decimal(18, 3) +2 1704056400 Decimal(18, 3) +3 1 Float32 +3 1 Float32 +3 1 Float32 +4 2 Float64 +4 2 Float64 +4 2 Float64 diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql index ad5ea9512c6..71d5dd4abd1 100644 --- a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql @@ -1,5 +1,6 @@ SET allow_experimental_dynamic_type=1; +DROP TABLE IF EXISTS null_table; CREATE TABLE null_table ( n1 UInt8, @@ -7,9 +8,11 @@ CREATE TABLE null_table ) ENGINE = Null; +DROP VIEW IF EXISTS dummy_rmv; CREATE MATERIALIZED VIEW dummy_rmv TO to_table AS SELECT * FROM null_table; +DROP TABLE IF EXISTS to_table; CREATE TABLE to_table ( n1 UInt8, @@ -32,3 +35,7 @@ select ''; ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10); INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +DROP TABLE null_table; +DROP VIEW dummy_rmv; +DROP TABLE to_table; diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference new file mode 100644 index 00000000000..a43fa1e1227 --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -0,0 +1,31 @@ + ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple( + a Nullable(Int64)))) │ + └────────────────────┴──────────────────────────────────────────────────────┘ + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +1. │ 42 │ Nullable(String) │ + ├───────┼──────────────────┤ +2. │ Hello │ Nullable(String) │ + └───────┴──────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + ├─────────┼───────────────────────────────────────────────────────────────┤ +2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + └─────────┴───────────────────────────────────────────────────────────────┘ + ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ + ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 1 │ Nullable(Int64) │ Hello World! │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Variant(Array(Nullable(Int64)), String) │ + └────┴─────────────────┴──────────────┴─────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql new file mode 100644 index 00000000000..45126ccd471 --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -0,0 +1,5 @@ +SET input_format_try_infer_variants=1; +SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty; +SELECT c1, toTypeName(c1), c2, toTypeName(c2) FROM format('CSV', '1,Hello World!\n2,"[1,2,3]"\n3,"2020-01-01"\n') FORMAT Pretty; \ No newline at end of file diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference index d96fbf658d8..2d3b2f118f6 100644 --- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference @@ -1,26 +1,26 @@ -1 2024-01-01 Date -2 1704056400 String -3 1 String -4 2 String +1 2024-01-01 Date false +2 1704056400 Decimal(18, 3) false +3 1 Float32 true +4 2 Float64 true -1 2024-01-01 Date -1 2024-01-01 Date -2 1704056400 Decimal(18, 3) -2 1704056400 String -3 1 Float32 -3 1 String -4 2 Float64 -4 2 String +1 2024-01-01 Date false +1 2024-01-01 Date false +2 1704056400 Decimal(18, 3) false +2 1704056400 Decimal(18, 3) false +3 1 Float32 false +3 1 Float32 false +4 2 Float64 false +4 2 Float64 false -1 2024-01-01 String -1 2024-01-01 String -1 2024-01-01 String -2 1704056400 String -2 1704056400 String -2 1704056400 String -3 1 String -3 1 String -3 1 String -4 2 String -4 2 String -4 2 String +1 2024-01-01 Date true +1 2024-01-01 Date true +1 2024-01-01 Date true +2 1704056400 Decimal(18, 3) true +2 1704056400 Decimal(18, 3) true +2 1704056400 Decimal(18, 3) true +3 1 Float32 true +3 1 Float32 true +3 1 Float32 true +4 2 Float64 true +4 2 Float64 true +4 2 Float64 true diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql index 632f3504fdb..e476d34a1db 100644 --- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql @@ -11,16 +11,16 @@ CREATE TABLE to_table ENGINE = MergeTree ORDER BY n1; INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); -SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; +SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL; select ''; ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5); INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); -SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; +SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL; select ''; -ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=0); INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); -SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; +SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL; ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE } diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference index 5f243209ff3..e508bdd1990 100644 --- a/tests/queries/0_stateless/03152_dynamic_type_simple.reference +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference @@ -7,7 +7,7 @@ string2 String \N None 42 Int64 42 Int64 string String -string String [1, 2] String +string String [1,2] Array(Int64) [1,2] Array(Int64) \N None ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐ 1. │ 42 │ Int64 │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql index fd5328faf15..ed24b213b1c 100644 --- a/tests/queries/0_stateless/03152_dynamic_type_simple.sql +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql @@ -1,14 +1,17 @@ SET allow_experimental_dynamic_type=1; +DROP TABLE IF EXISTS test_max_types; CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory; INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2'); SELECT d, dynamicType(d) FROM test_max_types; SELECT ''; +DROP TABLE IF EXISTS test_nested_dynamic; CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory; INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL); SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic; +DROP TABLE IF EXISTS test_rapid_schema; CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory; INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42)); @@ -27,3 +30,8 @@ FROM FROM numbers(10000) ) ); + +DROP TABLE test_max_types; +DROP TABLE test_nested_dynamic; +DROP TABLE test_rapid_schema; + diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql index 8e942fe6f6e..3a0c98e63ee 100644 --- a/tests/queries/0_stateless/03153_dynamic_type_empty.sql +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql @@ -1,5 +1,7 @@ SET allow_experimental_dynamic_type=1; +DROP TABLE IF EXISTS test_null_empty; CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory; INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({}); SELECT d, dynamicType(d) FROM test_null_empty; +DROP TABLE test_null_empty; diff --git a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh index b6980b3a23a..f33a6b3ef27 100755 --- a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh +++ b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy ( timestamp_interval DateTime CODEC(DoubleDelta), @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} -nq " INSERT INTO event_envoy SELECT now() - number, 'us-east-1', 'ch_super_fast' FROM numbers_mt(1e5); " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy_remote ( timestamp_interval DateTime CODEC(DoubleDelta), diff --git a/tests/queries/0_stateless/03156_default_multiquery_split.sh b/tests/queries/0_stateless/03156_default_multiquery_split.sh index 8ba2f46b786..d849fb5a162 100755 --- a/tests/queries/0_stateless/03156_default_multiquery_split.sh +++ b/tests/queries/0_stateless/03156_default_multiquery_split.sh @@ -53,6 +53,6 @@ SELECT * FROM TEST2 ORDER BY value; DROP TABLE TEST1; DROP TABLE TEST2; EOF -$CLICKHOUSE_CLIENT -m -n < "$SQL_FILE_NAME" +$CLICKHOUSE_CLIENT -m < "$SQL_FILE_NAME" rm "$SQL_FILE_NAME" diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.reference b/tests/queries/0_stateless/03157_dynamic_type_json.reference index 38bca12bb95..14e851bdbc7 100644 --- a/tests/queries/0_stateless/03157_dynamic_type_json.reference +++ b/tests/queries/0_stateless/03157_dynamic_type_json.reference @@ -1,5 +1,5 @@ -1 (((((((((('deep_value')))))))))) -2 (((((((((('deep_array_value')))))))))) +1 {"level1":{"level2":{"level3":{"level4":{"level5":{"level6":{"level7":{"level8":{"level9":{"level10":"deep_value"}}}}}}}}}} +2 {"level1":{"level2":{"level3":{"level4":{"level5":{"level6":{"level7":{"level8":{"level9":{"level10":"deep_array_value"}}}}}}}}}} -(((((((((('deep_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) -(((((((((('deep_array_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) +{"level1":{"level2":{"level3":{"level4":{"level5":{"level6":{"level7":{"level8":{"level9":{"level10":"deep_value"}}}}}}}}}} JSON +{"level1":{"level2":{"level3":{"level4":{"level5":{"level6":{"level7":{"level8":{"level9":{"level10":"deep_array_value"}}}}}}}}}} JSON diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.sql b/tests/queries/0_stateless/03157_dynamic_type_json.sql index cb1a5987104..91af7942718 100644 --- a/tests/queries/0_stateless/03157_dynamic_type_json.sql +++ b/tests/queries/0_stateless/03157_dynamic_type_json.sql @@ -1,7 +1,8 @@ SET allow_experimental_dynamic_type=1; -SET allow_experimental_object_type=1; +SET allow_experimental_json_type=1; SET allow_experimental_variant_type=1; +DROP TABLE IF EXISTS test_deep_nested_json; CREATE TABLE test_deep_nested_json (i UInt16, d JSON) ENGINE = Memory; INSERT INTO test_deep_nested_json VALUES (1, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_value"}}}}}}}}}}'); @@ -11,3 +12,4 @@ SELECT * FROM test_deep_nested_json ORDER BY i; SELECT ''; SELECT d::Dynamic d1, dynamicType(d1) FROM test_deep_nested_json ORDER BY i; +DROP TABLE test_deep_nested_json; diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql index 20a9e17a148..a18f985f217 100644 --- a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql @@ -1,5 +1,4 @@ SET allow_experimental_dynamic_type=1; -SET allow_experimental_object_type=1; SET allow_experimental_variant_type=1; CREATE TABLE test_variable (v Variant(String, UInt32, IPv6, Bool, DateTime64)) ENGINE = Memory; diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql index d302205ca23..28b679e2214 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -1,12 +1,11 @@ -- Tags: no-random-settings SET allow_experimental_dynamic_type=1; -SET allow_experimental_object_type=1; SET allow_experimental_variant_type=1; SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory; +CREATE TABLE t (d Dynamic(max_types=254)) ENGINE = Memory; -- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256) INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); @@ -84,7 +83,7 @@ INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, [ SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; -CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; +CREATE TABLE t2 (d Dynamic(max_types=254)) ENGINE = Memory; INSERT INTO t2 SELECT * FROM t; SELECT ''; diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index c5a6cbab0bc..8edf541c2a0 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,2 +1,90 @@ -1231 John 33 +compact part +testing throw default mode +-- { echoOn } + +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; +DELETE FROM users_compact WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +SELECT 'testing drop mode'; +testing drop mode +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +DELETE FROM users_compact WHERE uid = 1231; +SELECT * FROM users_compact ORDER BY uid; +SYSTEM FLUSH LOGS; +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +all_1_1_0_2 +-- expecting no projection +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +SELECT 'testing rebuild mode'; +testing rebuild mode +INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +DELETE FROM users_compact WHERE uid = 6666; +SELECT * FROM users_compact ORDER BY uid; 8888 Alice 50 +SYSTEM FLUSH LOGS; +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +all_1_1_0_4 +all_3_3_0_4 +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%'; +p1 all_3_3_0_4 +p2 all_3_3_0_4 +wide part +testing throw default mode +-- { echoOn } + +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; +DELETE FROM users_wide WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +SELECT 'testing drop mode'; +testing drop mode +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +DELETE FROM users_wide WHERE uid = 1231; +SELECT * FROM users_wide ORDER BY uid; +SYSTEM FLUSH LOGS; +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +all_1_1_0_2 +-- expecting no projection +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +SELECT 'testing rebuild mode'; +testing rebuild mode +INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +DELETE FROM users_wide WHERE uid = 6666; +SELECT * FROM users_wide ORDER BY uid; +8888 Alice 50 +SYSTEM FLUSH LOGS; +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +all_1_1_0_4 +all_3_3_0_4 +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%'; +p1 all_3_3_0_4 +p2 all_3_3_0_4 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index b189388e356..da6427cbf22 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,31 +1,145 @@ +-- For cloud version, should also consider min_bytes_for_full_part_storage since packed storage exists, +-- but for less redundancy, just let CI test the parameter. -DROP TABLE IF EXISTS users; +SET lightweight_deletes_sync = 2, alter_sync = 2; -CREATE TABLE users ( +DROP TABLE IF EXISTS users_compact; + + +SELECT 'compact part'; + +CREATE TABLE users_compact ( uid Int16, name String, age Int16, projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) -) ENGINE = MergeTree order by uid; +) ENGINE = MergeTree order by uid +SETTINGS min_bytes_for_wide_part = 10485760; -INSERT INTO users VALUES (1231, 'John', 33); -INSERT INTO users VALUES (6666, 'Ksenia', 48); -INSERT INTO users VALUES (8888, 'Alice', 50); +INSERT INTO users_compact VALUES (1231, 'John', 33); -DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } +SELECT 'testing throw default mode'; -DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } +-- { echoOn } -DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; + +DELETE FROM users_compact WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } + +SELECT 'testing drop mode'; +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; + +DELETE FROM users_compact WHERE uid = 1231; + +SELECT * FROM users_compact ORDER BY uid; + +SYSTEM FLUSH LOGS; + +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); -- expecting no projection SELECT - name, - `table` + name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users'); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); -SELECT * FROM users ORDER BY uid; +SELECT 'testing rebuild mode'; +INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); -DROP TABLE users; +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; + +DELETE FROM users_compact WHERE uid = 6666; + +SELECT * FROM users_compact ORDER BY uid; + +SYSTEM FLUSH LOGS; + +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); + +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%'; + +-- { echoOff } + +DROP TABLE users_compact; + + +SELECT 'wide part'; +CREATE TABLE users_wide ( + uid Int16, + name String, + age Int16, + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) +) ENGINE = MergeTree order by uid +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO users_wide VALUES (1231, 'John', 33); + +SELECT 'testing throw default mode'; + +-- { echoOn } + +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; + +DELETE FROM users_wide WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } + +SELECT 'testing drop mode'; +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; + +DELETE FROM users_wide WHERE uid = 1231; + +SELECT * FROM users_wide ORDER BY uid; + +SYSTEM FLUSH LOGS; + +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); + +-- expecting no projection +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); + +SELECT 'testing rebuild mode'; +INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); + +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; + +DELETE FROM users_wide WHERE uid = 6666; + +SELECT * FROM users_wide ORDER BY uid; + +SYSTEM FLUSH LOGS; + +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); + +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%'; + +-- { echoOff } + +DROP TABLE users_wide; diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh index 29d4c877909..75efc3f057a 100755 --- a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh @@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -67,7 +67,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -84,7 +84,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage diff --git a/tests/queries/0_stateless/03169_time_virtual_column.sh b/tests/queries/0_stateless/03169_time_virtual_column.sh index fef1de8c6f2..b289f39accb 100755 --- a/tests/queries/0_stateless/03169_time_virtual_column.sh +++ b/tests/queries/0_stateless/03169_time_virtual_column.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv sleep 1 -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " select _size, (dateDiff('millisecond', _time, now()) < 600000 AND dateDiff('millisecond', _time, now()) > 0) from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv'); " rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv diff --git a/tests/queries/0_stateless/03170_ecs_crash.reference b/tests/queries/0_stateless/03170_ecs_crash.reference new file mode 100644 index 00000000000..acd7c60768b --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.reference @@ -0,0 +1,4 @@ +1 2 3 +4 5 6 +7 8 9 +0 0 0 diff --git a/tests/queries/0_stateless/03170_ecs_crash.sh b/tests/queries/0_stateless/03170_ecs_crash.sh new file mode 100755 index 00000000000..fa6870c4cf2 --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Previous versions crashed in attempt to use this authentication method (regardless of whether it was able to authenticate): +AWS_CONTAINER_CREDENTIALS_FULL_URI=http://localhost:1338/latest/meta-data/container/security-credentials $CLICKHOUSE_LOCAL -q "select * from s3('http://localhost:11111/test/a.tsv')" diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh index 9b57e5c8718..b9bab2bd70b 100755 --- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists test" -$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=255)) engine=Memory" +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=254)) engine=Memory" $CLICKHOUSE_CLIENT -q "insert into test select 0, NULL" $CLICKHOUSE_CLIENT -q "insert into test select 1, materialize(42)::UInt8" @@ -58,6 +58,6 @@ $CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([[(20, 20), (50, $CLICKHOUSE_CLIENT -q "insert into test select 48, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" $CLICKHOUSE_CLIENT -q "insert into test select 49, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" -$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id" +$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=254)' -q "select d, dynamicType(d) from table order by id" $CLICKHOUSE_CLIENT -q "drop table test" diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sh b/tests/queries/0_stateless/03172_system_detached_tables.sh index 47775abcc45..60e913b62a8 100755 --- a/tests/queries/0_stateless/03172_system_detached_tables.sh +++ b/tests/queries/0_stateless/03172_system_detached_tables.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DATABASE_ATOMIC="${CLICKHOUSE_DATABASE}_atomic" DATABASE_LAZY="${CLICKHOUSE_DATABASE}_lazy" -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " SELECT 'database atomic tests'; DROP DATABASE IF EXISTS ${DATABASE_ATOMIC}; @@ -36,7 +36,7 @@ DROP DATABASE ${DATABASE_ATOMIC} SYNC; " -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " SELECT '-----------------------'; SELECT 'database lazy tests'; diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index 7c567c0f58f..d2be9899f86 100755 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -nq " INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); " -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " SET enable_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; SELECT diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference index 1ba147f9627..7de0804e0f2 100644 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference @@ -110,5 +110,5 @@ true Bool [{42:(1,[(2,{1:2})])}] Dynamic [{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) [{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) -[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) -[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=254) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=254) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh index 0c585d36348..1e674a29072 100755 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test { - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --stacktrace --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --stacktrace --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" } test "materialize(42)::UInt8" @@ -66,4 +66,4 @@ test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))" test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic" test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=10)" -test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=255)" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=254)" diff --git a/tests/queries/0_stateless/03174_exact_rows_before_aggregation.reference b/tests/queries/0_stateless/03174_exact_rows_before_aggregation.reference new file mode 100644 index 00000000000..5f7fedbbcb3 --- /dev/null +++ b/tests/queries/0_stateless/03174_exact_rows_before_aggregation.reference @@ -0,0 +1,548 @@ +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9] + ], + + "rows": 10, + + "rows_before_aggregation": 10 +} + + + + + + i + Int32 + + + + + + 0 + + + 1 + + + 2 + + + 3 + + + 4 + + + 5 + + + 6 + + + 7 + + + 8 + + + 9 + + + 10 + 10 + +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [10], + [11], + [12] + ], + + "rows": 3, + + "rows_before_aggregation": 3 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19] + ], + + "rows": 20, + + "rows_before_aggregation": 20 +} +{ + "meta": + [ + { + "name": "max(i)", + "type": "Int32" + } + ], + + "data": + [ + [19] + ], + + "rows": 1, + + "rows_before_limit_at_least": 1, + + "rows_before_aggregation": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19], + [20], + [21], + [22], + [23], + [24], + [25], + [26], + [27], + [28], + [29] + ], + + "rows": 30, + + "rows_before_limit_at_least": 60, + + "rows_before_aggregation": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19] + ], + + "rows": 20, + + "rows_before_limit_at_least": 40, + + "rows_before_aggregation": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19], + [20], + [21], + [22], + [23], + [24], + [25], + [26], + [27], + [28], + [29] + ], + + "rows": 30, + + "rows_before_limit_at_least": 30, + + "rows_before_aggregation": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19] + ], + + "rows": 20, + + "rows_before_limit_at_least": 20, + + "rows_before_aggregation": 40 +} +{ + "meta": + [ + { + "name": "max(i)", + "type": "Int32" + } + ], + + "data": + [ + [19] + ], + + "rows": 1, + + "rows_before_limit_at_least": 1, + + "rows_before_aggregation": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9] + ], + + "rows": 10, + + "rows_before_limit_at_least": 10, + + "rows_before_aggregation": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9] + ], + + "rows": 10, + + "rows_before_limit_at_least": 10, + + "rows_before_aggregation": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9] + ], + + "rows": 10, + + "rows_before_aggregation": 10 +} +{ + "meta": + [ + { + "name": "max(i)", + "type": "Int32" + } + ], + + "data": + [ + [19] + ], + + "rows": 1, + + "rows_before_limit_at_least": 1, + + "rows_before_aggregation": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19], + [20], + [21], + [22], + [23], + [24], + [25], + [26], + [27], + [28], + [29] + ], + + "rows": 30, + + "rows_before_limit_at_least": 60, + + "rows_before_aggregation": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11], + [12], + [13], + [14], + [15], + [16], + [17], + [18], + [19], + [20], + [21], + [22], + [23], + [24], + [25], + [26], + [27], + [28], + [29] + ], + + "rows": 30, + + "rows_before_limit_at_least": 30, + + "rows_before_aggregation": 60 +} diff --git a/tests/queries/0_stateless/03174_exact_rows_before_aggregation.sql b/tests/queries/0_stateless/03174_exact_rows_before_aggregation.sql new file mode 100644 index 00000000000..f9fd4ef5a7b --- /dev/null +++ b/tests/queries/0_stateless/03174_exact_rows_before_aggregation.sql @@ -0,0 +1,47 @@ +-- Tags: no-parallel, no-random-merge-tree-settings + +set rows_before_aggregation = 1, exact_rows_before_limit = 1, output_format_write_statistics = 0, max_block_size = 100; + +drop table if exists test; + +create table test (i int) engine MergeTree order by tuple(); +insert into test select arrayJoin(range(10000)); + +select * from test where i < 10 group by i order by i FORMAT JSONCompact; +select * from test where i < 10 group by i order by i FORMAT XML; +select * from test group by i having i in (10, 11, 12) order by i FORMAT JSONCompact; +select * from test where i < 20 group by i order by i FORMAT JSONCompact; +select max(i) from test where i < 20 limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 0; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 group by i order by i FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 group by i order by i FORMAT JSONCompact; + +set prefer_localhost_replica = 1; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 group by i order by i FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 group by i order by i FORMAT JSONCompact; + +select max(i) from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 FORMAT JSONCompact; + +select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) group by i order by i limit 10 FORMAT JSONCompact; +set prefer_localhost_replica = 0; +select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) group by i order by i limit 10 FORMAT JSONCompact; + +drop table if exists test; + +create table test (i int) engine MergeTree order by i; + +insert into test select arrayJoin(range(10000)); + +set optimize_aggregation_in_order=1; + +select * from test where i < 10 group by i order by i FORMAT JSONCompact; +select max(i) from test where i < 20 limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 0; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 group by i order by i FORMAT JSONCompact; + +set prefer_localhost_replica = 1; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 group by i order by i FORMAT JSONCompact; + +drop table if exists test; diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh index ce53f467823..583257d8fd3 100755 --- a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -5,25 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh index 4f217935123..c759cc34425 100755 --- a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " DROP TABLE IF EXISTS t_unload_primary_key; CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) @@ -26,7 +26,7 @@ for _ in {1..100}; do sleep 0.3 done -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; DROP TABLE IF EXISTS t_unload_primary_key; " diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.sh b/tests/queries/0_stateless/03199_dictionary_table_access.sh index 952b466b5da..14f017c7fbc 100755 --- a/tests/queries/0_stateless/03199_dictionary_table_access.sh +++ b/tests/queries/0_stateless/03199_dictionary_table_access.sh @@ -8,7 +8,7 @@ username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -26,15 +26,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT * FROM ${dicttablename}; " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dictname}; " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dicttablename}; " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS ${dicttablename} SYNC; DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference index 759b7763cd1..955106946ea 100644 --- a/tests/queries/0_stateless/03199_json_extract_dynamic.reference +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -12,7 +12,7 @@ Hello String [1,2,3] Array(Nullable(Int64)) ['str1','str2','str3'] Array(Nullable(String)) [[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) -['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01 00:00:00','2020-01-01 00:00:00'] Array(Nullable(DateTime)) ['2020-01-01','2020-01-01 date'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference index 6d2c1334d6e..8d2470dea44 100644 --- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -1,10 +1,10 @@ -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +1 +2 +3 +4 +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql index 25f3bb0f4c8..939b49e1599 100644 --- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -1,6 +1,8 @@ set allow_experimental_dynamic_type=1; +drop table if exists test; create table test (d Dynamic) engine=Memory; insert into table test select * from numbers(5); -alter table test modify column d Dynamic(max_types=1); +alter table test modify column d Dynamic(max_types=0); select d.UInt64 from test settings enable_analyzer=1; select d.UInt64 from test settings enable_analyzer=0; +drop table test; diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03203_drop_detached_partition_all.reference b/tests/queries/0_stateless/03203_drop_detached_partition_all.reference new file mode 100644 index 00000000000..c0f52d1d898 --- /dev/null +++ b/tests/queries/0_stateless/03203_drop_detached_partition_all.reference @@ -0,0 +1,5 @@ +1 1 +2 2 +3 3 +3 +0 diff --git a/tests/queries/0_stateless/03203_drop_detached_partition_all.sql b/tests/queries/0_stateless/03203_drop_detached_partition_all.sql new file mode 100644 index 00000000000..e29eb4ae36b --- /dev/null +++ b/tests/queries/0_stateless/03203_drop_detached_partition_all.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS t_03203; +CREATE TABLE t_03203 (p UInt64, v UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY v; +INSERT INTO t_03203 VALUES (1, 1), (2, 2), (3, 3); +SELECT * FROM t_03203 ORDER BY p, v; +ALTER TABLE t_03203 DETACH PARTITION ALL; +SELECT count() FROM system.detached_parts WHERE database = currentDatabase() AND table = 't_03203'; +ALTER TABLE t_03203 DROP DETACHED PARTITION ALL SETTINGS allow_drop_detached = 1; +SELECT count() FROM system.detached_parts WHERE database = currentDatabase() AND table = 't_03203'; diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference new file mode 100644 index 00000000000..0fbc1fb556e --- /dev/null +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -0,0 +1,115 @@ +TESTING THE FILE HIVE PARTITIONING + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +42 2020-01-01 +[1,2,3] 42.42 +Array(Int64) LowCardinality(Float64) +101 +2070 +2070 +b +1 +1 +TESTING THE URL PARTITIONING + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +1 +TESTING THE S3 PARTITIONING + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +OK +TESTING THE S3CLUSTER PARTITIONING + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth + last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh new file mode 100755 index 00000000000..60e8a6e9faa --- /dev/null +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'" + + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; + +SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; +SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42; +""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; +SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; +""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "INCORRECT_DATA" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 0; + +SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER" + + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'" + + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 0; + +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER" + + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'" + + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 1; + +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; +""" + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 0; + +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -F -q "UNKNOWN_IDENTIFIER" && echo "OK" || echo "FAIL"; + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'" + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 1; + +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; +""" diff --git a/tests/queries/0_stateless/03205_json_cast_from_string.reference b/tests/queries/0_stateless/03205_json_cast_from_string.reference new file mode 100644 index 00000000000..b9ac477eef4 --- /dev/null +++ b/tests/queries/0_stateless/03205_json_cast_from_string.reference @@ -0,0 +1,18 @@ +{} +{"a":"42","b":"Hello"} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} +{"a":{"b":{"c":{"d":true},"e":"43"},"f":"44"},"g":"44"} +{"a":{"b":{"e":"43"},"f":"44"},"g":"44"} +{"a":{"b":{"e":"43"},"f":"44"},"g":"44"} +{"a":{"f":"44"},"g":"44"} +{"g":"44"} +{"a":{"f":"44"},"g":"44"} +{"g":"44"} +{} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {'a.b.c.d':'Int64','a.b.e':'Int64'} {'a.f':'Int64','g':'Int64'} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {'a.b.c.d':'Int64'} {'a.b.e':'Int64','a.f':'Int64','g':'Int64'} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {'a.b.c.d':'Int64','a.b.e':'Int64'} {'a.f':'Int64','g':'Int64'} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {'a.b.c.d':'Int64'} {'a.b.e':'Int64','a.f':'Int64','g':'Int64'} +{"a":{"b":{"c":{"d":"42"},"e":"43"},"f":"44"},"g":"44"} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} {} {'a.b.c.d':'Int64','a.b.e':'Int64','a.f':'Int64','g':'Int64'} diff --git a/tests/queries/0_stateless/03205_json_cast_from_string.sql b/tests/queries/0_stateless/03205_json_cast_from_string.sql new file mode 100644 index 00000000000..5ceee134c51 --- /dev/null +++ b/tests/queries/0_stateless/03205_json_cast_from_string.sql @@ -0,0 +1,22 @@ +-- Tags: no-fasttest +set allow_experimental_json_type=1; + +select materialize('{}')::JSON; +select materialize('{"a" : 42, "b" : "Hello"}')::JSON; +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON; +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(a.b.c.d Bool); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP a.b.c.d); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP a.b.c); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP a.b); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP a); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP REGEXP '.*a.*b'); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP REGEXP '.*a.*'); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(SKIP REGEXP '.*'); + +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 2) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 1) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 0) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 2, max_dynamic_types=0) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 1, max_dynamic_types=0) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); +select materialize('{"a" : {"b" : {"c" : {"d" : 42}, "e" : 43}, "f" : 44}, "g" : 44}')::JSON(max_dynamic_paths = 0, max_dynamic_types=0) as json, JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json); diff --git a/tests/queries/0_stateless/03205_json_syntax.reference b/tests/queries/0_stateless/03205_json_syntax.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03205_json_syntax.sql b/tests/queries/0_stateless/03205_json_syntax.sql new file mode 100644 index 00000000000..e3c88c81d0d --- /dev/null +++ b/tests/queries/0_stateless/03205_json_syntax.sql @@ -0,0 +1,40 @@ +-- Tags: no-fasttest + +set allow_experimental_json_type=1; +drop table if exists test; +create table test (json JSON) engine=Memory; +drop table test; +create table test (json JSON(max_dynamic_paths=10)) engine=Memory; +drop table test; +create table test (json JSON(max_dynamic_types=10)) engine=Memory; +drop table test; +create table test (json JSON(a UInt32)) engine=Memory; +drop table test; +create table test (json JSON(aaaaa UInt32)) engine=Memory; +drop table test; +create table test (json JSON(`a b c d` UInt32)) engine=Memory; +drop table test; +create table test (json JSON(a.b.c UInt32)) engine=Memory; +drop table test; +create table test (json JSON(aaaa.b.cccc UInt32)) engine=Memory; +drop table test; +create table test (json JSON(`some path`.`path some` UInt32)) engine=Memory; +drop table test; +create table test (json JSON(a.b.c Tuple(d UInt32, e UInt32))) engine=Memory; +drop table test; +create table test (json JSON(SKIP a)) engine=Memory; +drop table test; +create table test (json JSON(SKIP aaaa)) engine=Memory; +drop table test; +create table test (json JSON(SKIP `a b c d`)) engine=Memory; +drop table test; +create table test (json JSON(SKIP a.b.c)) engine=Memory; +drop table test; +create table test (json JSON(SKIP aaaa.b.cccc)) engine=Memory; +drop table test; +create table test (json JSON(SKIP `some path`.`path some`)) engine=Memory; +drop table test; +create table test (json JSON(SKIP REGEXP '.*a.*')) engine=Memory; +drop table test; +create table test (json JSON(max_dynamic_paths=10, max_dynamic_types=10, a.b.c UInt32, b.c.d String, SKIP g.d.a, SKIP o.g.a, SKIP REGEXP '.*u.*', SKIP REGEXP 'abc')) engine=Memory; +drop table test; diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference new file mode 100644 index 00000000000..4be3baadaea --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -0,0 +1,68 @@ +Negative test of overlay +Test with 3 arguments and various combinations of const/non-const columns +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Test with 4 arguments and various combinations of const/non-const columns +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Test with special offset values +-12 __ark SQL 之park SQL和CH +-11 __ark SQL S之ark SQL和CH +-10 __ark SQL Sp之rk SQL和CH +-9 __ark SQL Spa之k SQL和CH +-8 S__rk SQL Spar之 SQL和CH +-7 Sp__k SQL Spark之SQL和CH +-6 Spa__ SQL Spark 之QL和CH +-5 Spar__SQL Spark S之L和CH +-4 Spark__QL Spark SQ之和CH +-3 Spark __L Spark SQL之CH +-2 Spark S__ Spark SQL和之H +-1 Spark SQ__ Spark SQL和C之 +0 Spark SQL__ Spark SQL和CH之 +1 __ark SQL 之park SQL和CH +2 S__rk SQL S之ark SQL和CH +3 Sp__k SQL Sp之rk SQL和CH +4 Spa__ SQL Spa之k SQL和CH +5 Spar__SQL Spar之 SQL和CH +6 Spark__QL Spark之SQL和CH +7 Spark __L Spark 之QL和CH +8 Spark S__ Spark S之L和CH +9 Spark SQ__ Spark SQ之和CH +10 Spark SQL__ Spark SQL之CH +11 Spark SQL__ Spark SQL和之H +12 Spark SQL__ Spark SQL和C之 +13 Spark SQL__ Spark SQL和CH之 +Test with special length values +-1 Spark ANSI Spark ANSI H +0 Spark ANSI SQL Spark ANSI SQL和CH +1 Spark ANSI QL Spark ANSI QL和CH +2 Spark ANSI L Spark ANSI L和CH +3 Spark ANSI Spark ANSI 和CH +4 Spark ANSI Spark ANSI CH +5 Spark ANSI Spark ANSI H +6 Spark ANSI Spark ANSI +Test with special input and replace values +_ _ +Spark SQL Spark SQL和CH +ANSI ANSI +Spark SQL Spark SQL和CH diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql new file mode 100644 index 00000000000..765b29f93ec --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -0,0 +1,47 @@ +SELECT 'Negative test of overlay'; +SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Test with 3 arguments and various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6); +SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6); +SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6); +SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)); + +SELECT 'Test with 4 arguments and various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); + +SELECT 'Test with special offset values'; +WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26); + +SELECT 'Test with special length values'; +WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8); + +SELECT 'Test with special input and replace values'; +SELECT overlay('', '_', 6), overlayUTF8('', '_', 6); +SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6); +SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', '', 7, 0), overlayUTF8('Spark SQL和CH', '', 7, 0); diff --git a/tests/queries/0_stateless/03205_system_sync_replica_format.reference b/tests/queries/0_stateless/03205_system_sync_replica_format.reference new file mode 100644 index 00000000000..aad51dd90b0 --- /dev/null +++ b/tests/queries/0_stateless/03205_system_sync_replica_format.reference @@ -0,0 +1 @@ +SYSTEM SYNC REPLICA db.`table` LIGHTWEIGHT diff --git a/tests/queries/0_stateless/03205_system_sync_replica_format.sql b/tests/queries/0_stateless/03205_system_sync_replica_format.sql new file mode 100644 index 00000000000..329bce80afc --- /dev/null +++ b/tests/queries/0_stateless/03205_system_sync_replica_format.sql @@ -0,0 +1 @@ +SELECT formatQuery('SYSTEM SYNC REPLICA db.table LIGHTWEIGHT'); diff --git a/tests/queries/0_stateless/03206_json_parsing_and_formatting.reference b/tests/queries/0_stateless/03206_json_parsing_and_formatting.reference new file mode 100644 index 00000000000..75e55e0376d --- /dev/null +++ b/tests/queries/0_stateless/03206_json_parsing_and_formatting.reference @@ -0,0 +1,195 @@ +JSON with no arguments +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {} +{'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {} +{'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','e':'String'} {'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','e':'String'} {} +{'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','c':'Int64'} {'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','c':'Int64'} {} +{'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {'a.b.c':'Int64','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {} +1 2020-01-01 {"e":{"f":["s1","s2"]}} +2 [1,2,3] {"e":{"g":"43"}} +3 \N {} +4 \N {} +5 ['b1','b2'] {"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}} +JSON(a.b Tuple(c UInt32, d Array(Bool)), SKIP d.e, SKIP c, SKIP REGEXP '.*h.*') +{"a":{"b":{"c":1,"d":[false,true]}},"b":"2020-01-01"} +{"a":{"b":{"c":2,"d":[true,true]}},"b":["1","2","3"]} +{"a":{"b":{"c":3,"d":[true,true]}},"e":"Hello, World!"} +{"a":{"b":{"c":4,"d":[true,true]}}} +{"a":{"b":{"c":5,"d":[true,true]}},"b":["b1","b2"]} +{'a.b':'Tuple(c UInt32, d Array(Bool))','b':'Date'} {'b':'Date'} {} +{'a.b':'Tuple(c UInt32, d Array(Bool))','b':'Array(Nullable(Int64))'} {'b':'Array(Nullable(Int64))'} {} +{'a.b':'Tuple(c UInt32, d Array(Bool))','e':'String'} {'e':'String'} {} +{'a.b':'Tuple(c UInt32, d Array(Bool))'} {} {} +{'a.b':'Tuple(c UInt32, d Array(Bool))','b':'Array(Nullable(String))'} {'b':'Array(Nullable(String))'} {} +JSON(a.b.c UInt32, max_dynamic_paths=2) +{"a":{"b":{"c":1,"d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":2,"d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":3,"d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":4,"d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":5,"d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {'a.b.d':'Array(Nullable(Int64))','b':'Date'} {'c':'Int64','d.e.f':'Array(Nullable(String))'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))'} {'d.e.g':'Int64'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','e':'String'} {'a.b.d':'Array(Nullable(Int64))'} {'e':'String'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','c':'Int64'} {'a.b.d':'Array(Nullable(Int64))'} {'c':'Int64'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))'} {'d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} +JSON(a.b.c UInt32, max_dynamic_paths=0) +{"a":{"b":{"c":1,"d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":2,"d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":3,"d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":4,"d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":5,"d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {} {'a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','e':'String'} {} {'a.b.d':'Array(Nullable(Int64))','e':'String'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','c':'Int64'} {} {'a.b.d':'Array(Nullable(Int64))','c':'Int64'} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} +JSON(a.b.c UInt32, max_dynamic_types=1) +{"a":{"b":{"c":1,"d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":2,"d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":3,"d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":4,"d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":5,"d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {'a.b.d':'Array(Nullable(Int64))','b':'Date','c':'Int64','d.e.f':'Array(Nullable(String))'} {} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(Int64))','d.e.g':'Int64'} {} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','e':'String'} {'a.b.d':'Array(Nullable(Int64))','e':'String'} {} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','c':'Int64'} {'a.b.d':'Array(Nullable(Int64))','c':'Int64'} {} +{'a.b.c':'UInt32','a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {'a.b.d':'Array(Nullable(Int64))','b':'Array(Nullable(String))','d.e.f':'Array(Nullable(String))','d.e.g':'Int64','d.e.h':'DateTime'} {} +Test small max_read_buffer_size +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +Test PrettyJSONEachRow +{ + "json": { + "a" : { + "b" : { + "c" : "1", + "d" : [ + "0", + "1" + ] + } + }, + "b" : "2020-01-01", + "c" : "42", + "d" : { + "e" : { + "f" : [ + "s1", + "s2" + ] + } + } + } +} +{ + "json": { + "a" : { + "b" : { + "c" : "2", + "d" : [ + "2", + "3" + ] + } + }, + "b" : [ + "1", + "2", + "3" + ], + "d" : { + "e" : { + "g" : "43" + } + } + } +} +{ + "json": { + "a" : { + "b" : { + "c" : "3", + "d" : [ + "4", + "5" + ] + } + }, + "e" : "Hello, World!" + } +} +{ + "json": { + "a" : { + "b" : { + "c" : "4", + "d" : [ + "6", + "7" + ] + } + }, + "c" : "43" + } +} +{ + "json": { + "a" : { + "b" : { + "c" : "5", + "d" : [ + "8", + "9" + ] + } + }, + "b" : [ + "b1", + "b2" + ], + "d" : { + "e" : { + "f" : [ + "s3", + "s4" + ], + "g" : "44", + "h" : "2020-02-02 10:00:00" + } + } + } +} +Test TSV +{"a":{"b":{"c":"1","d":["0","1"]}},"b":"2020-01-01","c":"42","d":{"e":{"f":["s1","s2"]}}} +{"a":{"b":{"c":"2","d":["2","3"]}},"b":["1","2","3"],"d":{"e":{"g":"43"}}} +{"a":{"b":{"c":"3","d":["4","5"]}},"e":"Hello, World!"} +{"a":{"b":{"c":"4","d":["6","7"]}},"c":"43"} +{"a":{"b":{"c":"5","d":["8","9"]}},"b":["b1","b2"],"d":{"e":{"f":["s3","s4"],"g":"44","h":"2020-02-02 10:00:00"}}} +Test CSV +"{""a"":{""b"":{""c"":""1"",""d"":[""0"",""1""]}},""b"":""2020-01-01"",""c"":""42"",""d"":{""e"":{""f"":[""s1"",""s2""]}}}" +"{""a"":{""b"":{""c"":""2"",""d"":[""2"",""3""]}},""b"":[""1"",""2"",""3""],""d"":{""e"":{""g"":""43""}}}" +"{""a"":{""b"":{""c"":""3"",""d"":[""4"",""5""]}},""e"":""Hello, World!""}" +"{""a"":{""b"":{""c"":""4"",""d"":[""6"",""7""]}},""c"":""43""}" +"{""a"":{""b"":{""c"":""5"",""d"":[""8"",""9""]}},""b"":[""b1"",""b2""],""d"":{""e"":{""f"":[""s3"",""s4""],""g"":""44"",""h"":""2020-02-02 10:00:00""}}}" diff --git a/tests/queries/0_stateless/03206_json_parsing_and_formatting.sh b/tests/queries/0_stateless/03206_json_parsing_and_formatting.sh new file mode 100755 index 00000000000..7e53e4388ec --- /dev/null +++ b/tests/queries/0_stateless/03206_json_parsing_and_formatting.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json + +echo '{"a" : {"b" : {"c" : 1, "d" : [0, 1]}}, "b" : "2020-01-01", "c" : 42, "d" : {"e" : {"f" : ["s1", "s2"]}}} +{"a" : {"b" : {"c" : 2, "d" : [2, 3]}}, "b" : [1, 2, 3], "c" : null, "d" : {"e" : {"g" : 43}}} +{"a" : {"b" : {"c" : 3, "d" : [4, 5]}}, "e" : "Hello, World!"} +{"a" : {"b" : {"c" : 4, "d" : [6, 7]}}, "c" : 43} +{"a" : {"b" : {"c" : 5, "d" : [8, 9]}}, "b" : ["b1", "b2"], "d" : {"e" : {"f" : ["s3", "s4"], "g" : 44, "h" : "2020-02-02 10:00:00"}}}' > $DATA_FILE + +echo "JSON with no arguments" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json) from file($DATA_FILE, JSONAsObject)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json.a.b.c, json.b, json.^d from file($DATA_FILE, JSONAsObject)" + +echo "JSON(a.b Tuple(c UInt32, d Array(Bool)), SKIP d.e, SKIP c, SKIP REGEXP '.*h.*')" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject, 'json JSON(a.b Tuple(c UInt32, d Array(Bool)), SKIP d.e, SKIP c, SKIP REGEXP \'.*h.*\')')" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json) from file($DATA_FILE, JSONAsObject, 'json JSON(a.b Tuple(c UInt32, d Array(Bool)), SKIP d.e, SKIP c, SKIP REGEXP \'.*h.*\')')" + +echo "JSON(a.b.c UInt32, max_dynamic_paths=2)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_paths=2)')" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json) from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_paths=2)')" + +echo "JSON(a.b.c UInt32, max_dynamic_paths=0)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_paths=0)')" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json) from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_paths=0)')" + +echo "JSON(a.b.c UInt32, max_dynamic_types=1)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_types=0)')" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select JSONAllPathsWithTypes(json), JSONDynamicPathsWithTypes(json), JSONSharedDataPathsWithTypes(json) from file($DATA_FILE, JSONAsObject, 'json JSON(a.b.c UInt32, max_dynamic_types=0)')" + +echo "Test small max_read_buffer_size" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 --max_read_buffer_size=1 -q "select json from file($DATA_FILE, JSONAsObject)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 --max_read_buffer_size=2 -q "select json from file($DATA_FILE, JSONAsObject)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 --max_read_buffer_size=3 -q "select json from file($DATA_FILE, JSONAsObject)" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 --max_read_buffer_size=4 -q "select json from file($DATA_FILE, JSONAsObject)" + +echo "Test PrettyJSONEachRow" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, JSONAsObject) format PrettyJSONEachRow" + +echo "Test TSV" +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, TSV, 'json JSON') format TSV" +echo "Test CSV" +echo '"{""a"" : {""b"" : {""c"" : 1, ""d"" : [0, 1]}}, ""b"" : ""2020-01-01"", ""c"" : 42, ""d"" : {""e"" : {""f"" : [""s1"", ""s2""]}}}" +"{""a"" : {""b"" : {""c"" : 2, ""d"" : [2, 3]}}, ""b"" : [1, 2, 3], ""c"" : null, ""d"" : {""e"" : {""g"" : 43}}}" +"{""a"" : {""b"" : {""c"" : 3, ""d"" : [4, 5]}}, ""e"" : ""Hello, World!""}" +"{""a"" : {""b"" : {""c"" : 4, ""d"" : [6, 7]}}, ""c"" : 43}" +"{""a"" : {""b"" : {""c"" : 5, ""d"" : [8, 9]}}, ""b"" : [""b1"", ""b2""], ""d"" : {""e"" : {""f"" : [""s3"", ""s4""], ""g"" : 44, ""h"" : ""2020-02-02 10:00:00""}}}"' > $DATA_FILE +$CLICKHOUSE_LOCAL --allow_experimental_json_type=1 -q "select json from file($DATA_FILE, CSV, 'json JSON') format CSV" + +rm $DATA_FILE diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.reference b/tests/queries/0_stateless/03206_replication_lag_metric.reference new file mode 100644 index 00000000000..02f4a7264b1 --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.reference @@ -0,0 +1,4 @@ +0 +2 +0 +2 diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql new file mode 100644 index 00000000000..998c332a11c --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.sql @@ -0,0 +1,11 @@ +-- Tags: no-parallel + +CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); +CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); + +SET distributed_ddl_task_timeout = 0; +CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; +SELECT replication_lag FROM system.clusters WHERE cluster IN ('rdb1', 'rdb2') ORDER BY cluster ASC, replica_num ASC; + +DROP DATABASE rdb1; +DROP DATABASE rdb2; diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.reference.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.reference.j2 new file mode 100644 index 00000000000..a93a2259442 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.reference.j2 @@ -0,0 +1,826 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._25','Int64') +('b.b._26','Int64') +('b.b._27','Int64') +('b.b._28','Int64') +('b.b._29','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._25','Int64') +('b.b._26','Int64') +('b.b._27','Int64') +('b.b._28','Int64') +('b.b._29','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.sql.j2 new file mode 100644 index 00000000000..0ec1a86372b --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_compact_merge_tree.sql.j2 @@ -0,0 +1,93 @@ +-- Tags: no-fasttest + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, '{}' from numbers(5); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(5, 5); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(10, 5); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(15, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(20, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number), number::UInt32)) from numbers(25, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(30, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(35, 5); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; + +select json.non.existing.path from test order by id format JSONColumns; +select json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path from test order by id format JSONColumns; +select json, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; + +select json.a.b.c from test order by id format JSONColumns; +select json, json.a.b.c from test order by id format JSONColumns; + +select json.b.b.e from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.d.b from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; + +select json.^a, json.a.b.c from test order by id format JSONColumns; +select json, json.^a, json.a.b.c from test order by id format JSONColumns; + +select json.^a, json.a.b.d from test order by id format JSONColumns; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d from test order by id format JSONColumns; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +{% endfor -%} + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.reference b/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.reference new file mode 100644 index 00000000000..6276be52c0d --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.reference @@ -0,0 +1,413 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._25','Int64') +('b.b._26','Int64') +('b.b._27','Int64') +('b.b._28','Int64') +('b.b._29','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.sql b/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.sql new file mode 100644 index 00000000000..51e6970759d --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_memory.sql @@ -0,0 +1,87 @@ +-- Tags: no-fasttest, long +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type=1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=Memory; + +truncate table test; +insert into test select number, '{}' from numbers(5); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(5, 5); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(10, 5); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(15, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(20, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number), number::UInt32)) from numbers(25, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(30, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(35, 5); + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; + +select json.non.existing.path from test order by id format JSONColumns; +select json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path from test order by id format JSONColumns; +select json, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; + +select json.a.b.c from test order by id format JSONColumns; +select json, json.a.b.c from test order by id format JSONColumns; + +select json.b.b.e from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.d.b from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; + +select json.^a, json.a.b.c from test order by id format JSONColumns; +select json, json.^a, json.a.b.c from test order by id format JSONColumns; + +select json.^a, json.a.b.d from test order by id format JSONColumns; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d from test order by id format JSONColumns; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +drop table test; diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.reference.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.reference.j2 new file mode 100644 index 00000000000..a93a2259442 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.reference.j2 @@ -0,0 +1,826 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._25','Int64') +('b.b._26','Int64') +('b.b._27','Int64') +('b.b._28','Int64') +('b.b._29','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._25','Int64') +('b.b._26','Int64') +('b.b._27','Int64') +('b.b._28','Int64') +('b.b._29','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.e": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, "str_10", "str_11", "str_12", "str_13", "str_14", null, null, null, null, null, "str_20", "str_21", "str_22", "str_23", "str_24", "str_25", "str_26", "str_27", "str_28", "str_29", "str_30", "str_31", "str_32", "str_33", "str_34", "str_35", "str_36", "str_37", "str_38", "str_39"], + "json.a.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "25", null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._25.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "27", null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._27.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "28", null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._28.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._29": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "29", null, null, null, null, null, null, null, null, null, null], + "json.b.b._29.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "15", "16", "17", "18", "19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.d.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.c": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "1970-01-31", "1970-02-01", "1970-02-02", "1970-02-03", "1970-02-04", null, null, null, null, null], + "json.d.c.:`UUID`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.^`n`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.^`a`.b": [{"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":5}, {"c":6}, {"c":7}, {"c":8}, {"c":9}, {"c":0,"d":"10","e":"str_10"}, {"c":0,"d":"11","e":"str_11"}, {"c":0,"d":"12","e":"str_12"}, {"c":0,"d":"13","e":"str_13"}, {"c":0,"d":"14","e":"str_14"}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":0}, {"c":20,"d":"20","e":"str_20"}, {"c":21,"d":"21","e":"str_21"}, {"c":22,"d":"22","e":"str_22"}, {"c":23,"d":"23","e":"str_23"}, {"c":24,"d":"24","e":"str_24"}, {"c":25,"d":"25","e":"str_25"}, {"c":26,"d":"26","e":"str_26"}, {"c":27,"d":"27","e":"str_27"}, {"c":28,"d":"28","e":"str_28"}, {"c":29,"d":"29","e":"str_29"}, {"c":30,"d":[],"e":"str_30"}, {"c":31,"d":[],"e":"str_31"}, {"c":32,"d":[],"e":"str_32"}, {"c":33,"d":[],"e":"str_33"}, {"c":34,"d":[],"e":"str_34"}, {"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}, {"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}, {"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}, {"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}, {"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}], + "json.^`b`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"b":{"d":"15","e":"str_15"}}, {"b":{"d":"16","e":"str_16"}}, {"b":{"d":"17","e":"str_17"}}, {"b":{"d":"18","e":"str_18"}}, {"b":{"d":"19","e":"str_19"}}, {}, {}, {}, {}, {}, {"b":{"_25":"25"}}, {"b":{"_26":"26"}}, {"b":{"_27":"27"}}, {"b":{"_28":"28"}}, {"b":{"_29":"29"}}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], + "json.^`d`": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {"a":"30","c":"1970-01-31"}, {"a":"31","c":"1970-02-01"}, {"a":"32","c":"1970-02-02"}, {"a":"33","c":"1970-02-03"}, {"a":"34","c":"1970-02-04"}, {"a":["0"],"b":"35"}, {"a":["0","1"],"b":"36"}, {"a":["0","1","2"],"b":"37"}, {"a":["0","1","2","3"],"b":"38"}, {"a":["0","1","2","3","4"],"b":"39"}] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.non.existing.path": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.non.existing.path.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.b.b.e": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`String`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "str_15", "str_16", "str_17", "str_18", "str_19", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b.e.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.d.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "35", "36", "37", "38", "39"], + "json.d.b.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.b.b._26": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.d.a": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "30", "31", "32", "33", "34", ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"]], + "json.d.a.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Int64`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "26", null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], + "json.b.b._26.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.c": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} +{ + "json": [{"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":0}}}, {"a":{"b":{"c":5}}}, {"a":{"b":{"c":6}}}, {"a":{"b":{"c":7}}}, {"a":{"b":{"c":8}}}, {"a":{"b":{"c":9}}}, {"a":{"b":{"c":0,"d":"10","e":"str_10"}}}, {"a":{"b":{"c":0,"d":"11","e":"str_11"}}}, {"a":{"b":{"c":0,"d":"12","e":"str_12"}}}, {"a":{"b":{"c":0,"d":"13","e":"str_13"}}}, {"a":{"b":{"c":0,"d":"14","e":"str_14"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"15","e":"str_15"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"16","e":"str_16"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"17","e":"str_17"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"18","e":"str_18"}}}, {"a":{"b":{"c":0}},"b":{"b":{"d":"19","e":"str_19"}}}, {"a":{"b":{"c":20,"d":"20","e":"str_20"}}}, {"a":{"b":{"c":21,"d":"21","e":"str_21"}}}, {"a":{"b":{"c":22,"d":"22","e":"str_22"}}}, {"a":{"b":{"c":23,"d":"23","e":"str_23"}}}, {"a":{"b":{"c":24,"d":"24","e":"str_24"}}}, {"a":{"b":{"c":25,"d":"25","e":"str_25"}},"b":{"b":{"_25":"25"}}}, {"a":{"b":{"c":26,"d":"26","e":"str_26"}},"b":{"b":{"_26":"26"}}}, {"a":{"b":{"c":27,"d":"27","e":"str_27"}},"b":{"b":{"_27":"27"}}}, {"a":{"b":{"c":28,"d":"28","e":"str_28"}},"b":{"b":{"_28":"28"}}}, {"a":{"b":{"c":29,"d":"29","e":"str_29"}},"b":{"b":{"_29":"29"}}}, {"a":{"b":{"c":30,"d":[],"e":"str_30"}},"d":{"a":"30","c":"1970-01-31"}}, {"a":{"b":{"c":31,"d":[],"e":"str_31"}},"d":{"a":"31","c":"1970-02-01"}}, {"a":{"b":{"c":32,"d":[],"e":"str_32"}},"d":{"a":"32","c":"1970-02-02"}}, {"a":{"b":{"c":33,"d":[],"e":"str_33"}},"d":{"a":"33","c":"1970-02-03"}}, {"a":{"b":{"c":34,"d":[],"e":"str_34"}},"d":{"a":"34","c":"1970-02-04"}}, {"a":{"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}},"d":{"a":["0"],"b":"35"}}, {"a":{"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}},"d":{"a":["0","1"],"b":"36"}}, {"a":{"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}},"d":{"a":["0","1","2"],"b":"37"}}, {"a":{"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}},"d":{"a":["0","1","2","3"],"b":"38"}}, {"a":{"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}},"d":{"a":["0","1","2","3","4"],"b":"39"}}], + "json.^`a`": [{"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":5}}, {"b":{"c":6}}, {"b":{"c":7}}, {"b":{"c":8}}, {"b":{"c":9}}, {"b":{"c":0,"d":"10","e":"str_10"}}, {"b":{"c":0,"d":"11","e":"str_11"}}, {"b":{"c":0,"d":"12","e":"str_12"}}, {"b":{"c":0,"d":"13","e":"str_13"}}, {"b":{"c":0,"d":"14","e":"str_14"}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":0}}, {"b":{"c":20,"d":"20","e":"str_20"}}, {"b":{"c":21,"d":"21","e":"str_21"}}, {"b":{"c":22,"d":"22","e":"str_22"}}, {"b":{"c":23,"d":"23","e":"str_23"}}, {"b":{"c":24,"d":"24","e":"str_24"}}, {"b":{"c":25,"d":"25","e":"str_25"}}, {"b":{"c":26,"d":"26","e":"str_26"}}, {"b":{"c":27,"d":"27","e":"str_27"}}, {"b":{"c":28,"d":"28","e":"str_28"}}, {"b":{"c":29,"d":"29","e":"str_29"}}, {"b":{"c":30,"d":[],"e":"str_30"}}, {"b":{"c":31,"d":[],"e":"str_31"}}, {"b":{"c":32,"d":[],"e":"str_32"}}, {"b":{"c":33,"d":[],"e":"str_33"}}, {"b":{"c":34,"d":[],"e":"str_34"}}, {"b":{"c":35,"d":"1970-01-01 00:00:35","e":"str_35"}}, {"b":{"c":36,"d":"1970-01-01 00:00:36","e":"str_36"}}, {"b":{"c":37,"d":"1970-01-01 00:00:37","e":"str_37"}}, {"b":{"c":38,"d":"1970-01-01 00:00:38","e":"str_38"}}, {"b":{"c":39,"d":"1970-01-01 00:00:39","e":"str_39"}}], + "json.a.b.d": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", [], [], [], [], [], "1970-01-01 00:00:35", "1970-01-01 00:00:36", "1970-01-01 00:00:37", "1970-01-01 00:00:38", "1970-01-01 00:00:39"], + "json.a.b.d.:`Int64`": [null, null, null, null, null, null, null, null, null, null, "10", "11", "12", "13", "14", null, null, null, null, null, "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", null, null, null, null, null, null, null, null, null, null], + "json.a.b.d.:`Date`": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null] +} diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.sql.j2 new file mode 100644 index 00000000000..f571d2417f4 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_1_wide_merge_tree.sql.j2 @@ -0,0 +1,93 @@ +-- Tags: no-fasttest + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, '{}' from numbers(5); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(5, 5); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(10, 5); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(15, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(20, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number), number::UInt32)) from numbers(25, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(30, 5); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(35, 5); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_25`, json.b.b.`_25`.:Int64, json.b.b.`_25`.:UUID, json.b.b.`_26`, json.b.b.`_26`.:Int64, json.b.b.`_26`.:UUID, json.b.b.`_27`, json.b.b.`_27`.:Int64, json.b.b.`_27`.:UUID, json.b.b.`_28`, json.b.b.`_28`.:Int64, json.b.b.`_28`.:UUID, json.b.b.`_29`, json.b.b.`_29`.:Int64, json.b.b.`_29`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format JSONColumns; + +select json.non.existing.path from test order by id format JSONColumns; +select json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path from test order by id format JSONColumns; +select json, json.non.existing.path.:Int64 from test order by id format JSONColumns; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format JSONColumns; + +select json.a.b.c from test order by id format JSONColumns; +select json, json.a.b.c from test order by id format JSONColumns; + +select json.b.b.e from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.a.b.d from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format JSONColumns; + +select json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.d.b from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.b from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format JSONColumns; + +select json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.b.b.`_26` from test order by id format JSONColumns; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b.`_26`.:Date from test order by id format JSONColumns; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_26`.:Int64, json.b.b, json.b.b.`_26`.:Date from test order by id format JSONColumns; + +select json.^a, json.a.b.c from test order by id format JSONColumns; +select json, json.^a, json.a.b.c from test order by id format JSONColumns; + +select json.^a, json.a.b.d from test order by id format JSONColumns; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d from test order by id format JSONColumns; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format JSONColumns; + +{% endfor -%} + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.reference.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.reference.j2 new file mode 100644 index 00000000000..e1e69879cfb --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.reference.j2 @@ -0,0 +1,66 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._0','Int64') +('b.b._1','Int64') +('b.b._2','Int64') +('b.b._3','Int64') +('b.b._4','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +800000 +800000 +300000 +700000 +700000 +200000 +400000 +500000 +600000 +500000 +600000 +600000 +700000 +580000 +680000 +0 +0 +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._0','Int64') +('b.b._1','Int64') +('b.b._2','Int64') +('b.b._3','Int64') +('b.b._4','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +800000 +800000 +300000 +700000 +700000 +200000 +400000 +500000 +600000 +500000 +600000 +600000 +700000 +580000 +680000 +0 +0 diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.sql.j2 new file mode 100644 index 00000000000..6c33044b5d8 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_compact_merge_tree.sql.j2 @@ -0,0 +1,128 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, '{}' from numbers(100000); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(100000, 100000); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(200000, 100000); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(300000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(400000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number % 5), number::UInt32)) from numbers(500000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(600000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(700000, 100000); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; + +select count() from test where json.non.existing.path is Null; +select count() from test where json.non.existing.path.:String is Null; +select json.non.existing.path from test order by id format Null; +select json.non.existing.path.:Int64 from test order by id format Null; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path from test order by id format Null; +select json, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; + +select count() from test where json.a.b.c == 0; +select json.a.b.c from test format Null; +select json.a.b.c from test order by id format Null; +select json, json.a.b.c from test format Null; +select json, json.a.b.c from test order by id format Null; + +select count() from test where json.b.b.e is Null; +select count() from test where json.b.b.e.:String is Null; +select json.b.b.e from test format Null; +select json.b.b.e from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e from test format Null; +select json, json.b.b.e from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.a.b.d is Null ; +select count() from test where json.b.b.e.:String is Null and json.a.b.d.:Int64 is Null; +select json.b.b.e, json.a.b.d from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.a.b.d from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`); +select json.b.b.e, json.d.a from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null and json.d.b is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.d.b is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.d.a, json.d.b from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.b from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.b.b.`_1` is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.b.b.`_1`.:Int64 is Null; +select json.d.a, json.b.b.`_1` from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.b.b.`_1` from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.c == 0; +select json.^a, json.a.b.c from test order by id format Null; +select json, json.^a, json.a.b.c from test format Null; +select json, json.^a, json.a.b.c from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.d is Null; +select json.^a, json.a.b.d from test order by id format Null; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d from test order by id format Null; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +{% endfor -%} + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.reference b/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.reference new file mode 100644 index 00000000000..1ef53fb5716 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.reference @@ -0,0 +1,33 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._0','Int64') +('b.b._1','Int64') +('b.b._2','Int64') +('b.b._3','Int64') +('b.b._4','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +800000 +800000 +300000 +700000 +700000 +200000 +400000 +500000 +600000 +500000 +600000 +600000 +700000 +580000 +680000 +0 +0 diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.sql b/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.sql new file mode 100644 index 00000000000..cc646987c80 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_memory.sql @@ -0,0 +1,123 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=Memory; + +truncate table test; +insert into test select number, '{}' from numbers(100000); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(100000, 100000); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(200000, 100000); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(300000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(400000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number % 5), number::UInt32)) from numbers(500000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(600000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(700000, 100000); + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; + +select count() from test where json.non.existing.path is Null; +select count() from test where json.non.existing.path.:String is Null; +select json.non.existing.path from test order by id format Null; +select json.non.existing.path.:Int64 from test order by id format Null; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path from test order by id format Null; +select json, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; + +select count() from test where json.a.b.c == 0; +select json.a.b.c from test format Null; +select json.a.b.c from test order by id format Null; +select json, json.a.b.c from test format Null; +select json, json.a.b.c from test order by id format Null; + +select count() from test where json.b.b.e is Null; +select count() from test where json.b.b.e.:String is Null; +select json.b.b.e from test format Null; +select json.b.b.e from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e from test format Null; +select json, json.b.b.e from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.a.b.d is Null ; +select count() from test where json.b.b.e.:String is Null and json.a.b.d.:Int64 is Null; +select json.b.b.e, json.a.b.d from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.a.b.d from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`); +select json.b.b.e, json.d.a from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null and json.d.b is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.d.b is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.d.a, json.d.b from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.b from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.b.b.`_1` is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.b.b.`_1`.:Int64 is Null; +select json.d.a, json.b.b.`_1` from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.b.b.`_1` from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.c == 0; +select json.^a, json.a.b.c from test order by id format Null; +select json, json.^a, json.a.b.c from test format Null; +select json, json.^a, json.a.b.c from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.d is Null; +select json.^a, json.a.b.d from test order by id format Null; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d from test order by id format Null; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.reference.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.reference.j2 new file mode 100644 index 00000000000..e1e69879cfb --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.reference.j2 @@ -0,0 +1,66 @@ +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._0','Int64') +('b.b._1','Int64') +('b.b._2','Int64') +('b.b._3','Int64') +('b.b._4','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +800000 +800000 +300000 +700000 +700000 +200000 +400000 +500000 +600000 +500000 +600000 +600000 +700000 +580000 +680000 +0 +0 +('a.b.c','UInt32') +('a.b.d','Array(Nullable(String))') +('a.b.d','DateTime') +('a.b.d','Int64') +('a.b.e','String') +('b.b._0','Int64') +('b.b._1','Int64') +('b.b._2','Int64') +('b.b._3','Int64') +('b.b._4','Int64') +('b.b.d','Int64') +('b.b.e','String') +('d.a','Array(Nullable(Int64))') +('d.a','Int64') +('d.b','Int64') +('d.c','Date') +800000 +800000 +300000 +700000 +700000 +200000 +400000 +500000 +600000 +500000 +600000 +600000 +700000 +580000 +680000 +0 +0 diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 new file mode 100644 index 00000000000..ab4e0437c15 --- /dev/null +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 @@ -0,0 +1,128 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set session_timezone = 'UTC'; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, '{}' from numbers(100000); +insert into test select number, toJSONString(map('a.b.c', number)) from numbers(100000, 100000); +insert into test select number, toJSONString(map('a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(200000, 100000); +insert into test select number, toJSONString(map('b.b.d', number::UInt32, 'b.b.e', 'str_' || toString(number))) from numbers(300000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number))) from numbers(400000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', number::UInt32, 'a.b.e', 'str_' || toString(number), 'b.b._' || toString(number % 5), number::UInt32)) from numbers(500000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', range(number % + 1)::Array(UInt32), 'a.b.e', 'str_' || toString(number), 'd.a', number::UInt32, 'd.c', toDate(number))) from numbers(600000, 100000); +insert into test select number, toJSONString(map('a.b.c', number, 'a.b.d', toDateTime(number), 'a.b.e', 'str_' || toString(number), 'd.a', range(number % 5 + 1)::Array(UInt32), 'd.b', number::UInt32)) from numbers(700000, 100000); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; + +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test format Null; +select json, json.non.existing.path, json.a.b.c, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:UUID, json.a.b.e, json.a.b.e.:String, json.a.b.e.:UUID, json.b.b.`_0`, json.b.b.`_0`.:Int64, json.b.b.`_0`.:UUID, json.b.b.`_1`, json.b.b.`_1`.:Int64, json.b.b.`_1`.:UUID, json.b.b.`_2`, json.b.b.`_2`.:Int64, json.b.b.`_2`.:UUID, json.b.b.`_3`, json.b.b.`_3`.:Int64, json.b.b.`_3`.:UUID, json.b.b.`_4`, json.b.b.`_4`.:Int64, json.b.b.`_4`.:UUID, json.b.b.d, json.b.b.d.:Int64, json.b.b.d.:UUID, json.b.b.e, json.b.b.e.:String, json.b.b.e.:UUID, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:UUID, json.d.b, json.d.b.:Int64, json.d.b.:UUID, json.d.c, json.d.c.:Date, json.d.c.:UUID, json.^n, json.^a, json.^a.b, json.^b, json.^d from test order by id format Null; + +select count() from test where json.non.existing.path is Null; +select count() from test where json.non.existing.path.:String is Null; +select json.non.existing.path from test order by id format Null; +select json.non.existing.path.:Int64 from test order by id format Null; +select json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path from test order by id format Null; +select json, json.non.existing.path.:Int64 from test order by id format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test format Null; +select json, json.non.existing.path, json.non.existing.path.:Int64 from test order by id format Null; + +select count() from test where json.a.b.c == 0; +select json.a.b.c from test format Null; +select json.a.b.c from test order by id format Null; +select json, json.a.b.c from test format Null; +select json, json.a.b.c from test order by id format Null; + +select count() from test where json.b.b.e is Null; +select count() from test where json.b.b.e.:String is Null; +select json.b.b.e from test format Null; +select json.b.b.e from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e from test format Null; +select json, json.b.b.e from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.a.b.d is Null ; +select count() from test where json.b.b.e.:String is Null and json.a.b.d.:Int64 is Null; +select json.b.b.e, json.a.b.d from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.a.b.d from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`); +select json.b.b.e, json.d.a from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date from test order by id format Null; + +select count() from test where json.b.b.e is Null and json.d.a is Null and json.d.b is Null; +select count() from test where json.b.b.e.:String is Null and empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.d.a, json.d.b from test order by id format Null; +select json, json.b.b.e.:String, json.b.b.e.:Date, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.b.b.e, json.b.b.e.:String, json.b.b.e.:Date, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.d.b is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.d.b.:Int64 is Null; +select json.d.a, json.d.b from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.b from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.d.b, json.d.b.:Int64, json.d.b.:Date from test order by id format Null; + +select count() from test where json.d.a is Null and json.b.b.`_1` is Null; +select count() from test where empty(json.d.a.:`Array(Nullable(Int64))`) and json.b.b.`_1`.:Int64 is Null; +select json.d.a, json.b.b.`_1` from test order by id format Null; +select json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.b.b.`_1` from test order by id format Null; +select json, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b.`_1`.:Date from test order by id format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test format Null; +select json, json.d.a, json.d.a.:`Array(Nullable(Int64))`, json.d.a.:Date, json.b.b.`_1`.:Int64, json.b.b, json.b.b.`_1`.:Date from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.c == 0; +select json.^a, json.a.b.c from test order by id format Null; +select json, json.^a, json.a.b.c from test format Null; +select json, json.^a, json.a.b.c from test order by id format Null; + +select count() from test where empty(json.^a) and json.a.b.d is Null; +select json.^a, json.a.b.d from test order by id format Null; +select json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d from test order by id format Null; +select json, json.^a, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test format Null; +select json, json.^a, json.a.b.d, json.a.b.d.:Int64, json.a.b.d.:Date from test order by id format Null; + +{% endfor -%} + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.reference.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.reference.j2 new file mode 100644 index 00000000000..0228ae1e7df --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.reference.j2 @@ -0,0 +1,545 @@ +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +{ + "json": [{"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}], + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +{ + "json": [{"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}], + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +{ + "json": [{"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}], + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +{ + "json": [{"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}], + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +{ + "json": [{"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[]}}, {"a":{"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}, {"a":{"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}}, {"a":{"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}}, {"a":{"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}}, {"a":{"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}}, {"a":{"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}}], + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.b": [[], [], [], [], [], [{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.c.d.e": [[], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_0": [[], [], [], [], [], ["5"], ["6",null], ["7",null,null], ["8",null,null,null], ["9",null,null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_1": [[], [], [], [], [], [null], [null,"6"], [null,"7",null], [null,"8",null,null], [null,"9",null,null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_2": [[], [], [], [], [], [null], [null,null], [null,null,"7"], [null,null,"8",null], [null,null,"9",null,null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_3": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"8"], [null,null,null,"9",null], [], [], [], [], [], [], [], [], [], []], + "json.a.b.b.c.d_4": [[], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"9"], [], [], [], [], [], [], [], [], [], []], + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`": [[], [], [], [], [], [], [], [], [], [], [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_2": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,"12"], [null,null,"13",null], [null,null,"14",null,null], [null], [null,null], [null,null,"17"], [null,null,"18",null], [null,null,"19",null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_3": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,"13"], [null,null,null,"14",null], [null], [null,null], [null,null,null], [null,null,null,"18"], [null,null,null,"19",null]], + "json.a.r.:`Array(JSON)`.b.c.d_4": [[], [], [], [], [], [], [], [], [], [], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"14"], [null], [null,null], [null,null,null], [null,null,null,null], [null,null,null,null,"19"]], + "json.^`a`": [{"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[]}, {"b":[{"b":{"c":{"d_0":"5"}},"c":{"d":{"e":["0"]}}}]}, {"b":[{"b":{"c":{"d_0":"6"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"6"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[{"b":{"c":{"d_0":"7"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"7"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"7"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[{"b":{"c":{"d_0":"8"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"8"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"8"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"8"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[{"b":{"c":{"d_0":"9"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"9"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"9"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"9"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"9"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}]}, {"b":[],"r":[{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}]}, {"a1":"15","a2":"15","a3":"15","a4":"15","a5":"15","a6":"15","a7":"15","a8":"15","b":[],"r":[{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}]}, {"a1":"16","a2":"16","a3":"16","a4":"16","a5":"16","a6":"16","a7":"16","a8":"16","b":[],"r":[{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}]}, {"a1":"17","a2":"17","a3":"17","a4":"17","a5":"17","a6":"17","a7":"17","a8":"17","b":[],"r":[{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}]}, {"a1":"18","a2":"18","a3":"18","a4":"18","a5":"18","a6":"18","a7":"18","a8":"18","b":[],"r":[{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}]}, {"a1":"19","a2":"19","a3":"19","a4":"19","a5":"19","a6":"19","a7":"19","a8":"19","b":[],"r":[{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]}], + "json.a.b.^`b`.c": [[], [], [], [], [], [{"d_0":"5"}], [{"d_0":"6"},{"d_1":"6"}], [{"d_0":"7"},{"d_1":"7"},{"d_2":"7"}], [{"d_0":"8"},{"d_1":"8"},{"d_2":"8"},{"d_3":"8"}], [{"d_0":"9"},{"d_1":"9"},{"d_2":"9"},{"d_3":"9"},{"d_4":"9"}], [], [], [], [], [], [], [], [], [], []], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.c.d.e.:`Array(Nullable(Int64))`": [[], [], [], [], [], [], [], [], [], [], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]], [["0"]], [["0","1"],["0","1","2"]], [["0","1","2"],["0","1","2","3"],["0","1","2","3","4"]], [["0","1","2","3"],["0","1","2","3","4"],["0"],["0","1"]], [["0","1","2","3","4"],["0"],["0","1"],["0","1","2"],["0","1","2","3"]]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]], + "json.a.r.:`Array(JSON)`.b.c.d_1.:`Int64`": [[], [], [], [], [], [], [], [], [], [], [null], [null,"11"], [null,"12",null], [null,"13",null,null], [null,"14",null,null,null], [null], [null,"16"], [null,"17",null], [null,"18",null,null], [null,"19",null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} +{ + "json.a.r": [null, null, null, null, null, null, null, null, null, null, [{"b":{"c":{"d_0":"10"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"11"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"11"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"12"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"12"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"12"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"13"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"13"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"13"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"13"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"14"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"14"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"14"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"14"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"14"}},"c":{"d":{"e":["0","1","2","3"]}}}], [{"b":{"c":{"d_0":"15"}},"c":{"d":{"e":["0"]}}}], [{"b":{"c":{"d_0":"16"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_1":"16"}},"c":{"d":{"e":["0","1","2"]}}}], [{"b":{"c":{"d_0":"17"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_1":"17"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_2":"17"}},"c":{"d":{"e":["0","1","2","3","4"]}}}], [{"b":{"c":{"d_0":"18"}},"c":{"d":{"e":["0","1","2","3"]}}},{"b":{"c":{"d_1":"18"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_2":"18"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_3":"18"}},"c":{"d":{"e":["0","1"]}}}], [{"b":{"c":{"d_0":"19"}},"c":{"d":{"e":["0","1","2","3","4"]}}},{"b":{"c":{"d_1":"19"}},"c":{"d":{"e":["0"]}}},{"b":{"c":{"d_2":"19"}},"c":{"d":{"e":["0","1"]}}},{"b":{"c":{"d_3":"19"}},"c":{"d":{"e":["0","1","2"]}}},{"b":{"c":{"d_4":"19"}},"c":{"d":{"e":["0","1","2","3"]}}}]], + "json.a.r.:`Array(JSON)`.^`b`": [[], [], [], [], [], [], [], [], [], [], [{"c":{"d_0":"10"}}], [{"c":{"d_0":"11"}},{"c":{"d_1":"11"}}], [{"c":{"d_0":"12"}},{"c":{"d_1":"12"}},{"c":{"d_2":"12"}}], [{"c":{"d_0":"13"}},{"c":{"d_1":"13"}},{"c":{"d_2":"13"}},{"c":{"d_3":"13"}}], [{"c":{"d_0":"14"}},{"c":{"d_1":"14"}},{"c":{"d_2":"14"}},{"c":{"d_3":"14"}},{"c":{"d_4":"14"}}], [{"c":{"d_0":"15"}}], [{"c":{"d_0":"16"}},{"c":{"d_1":"16"}}], [{"c":{"d_0":"17"}},{"c":{"d_1":"17"}},{"c":{"d_2":"17"}}], [{"c":{"d_0":"18"}},{"c":{"d_1":"18"}},{"c":{"d_2":"18"}},{"c":{"d_3":"18"}}], [{"c":{"d_0":"19"}},{"c":{"d_1":"19"}},{"c":{"d_2":"19"}},{"c":{"d_3":"19"}},{"c":{"d_4":"19"}}]], + "json.a.r.:`Array(JSON)`.^`b`.c": [[], [], [], [], [], [], [], [], [], [], [{"d_0":"10"}], [{"d_0":"11"},{"d_1":"11"}], [{"d_0":"12"},{"d_1":"12"},{"d_2":"12"}], [{"d_0":"13"},{"d_1":"13"},{"d_2":"13"},{"d_3":"13"}], [{"d_0":"14"},{"d_1":"14"},{"d_2":"14"},{"d_3":"14"},{"d_4":"14"}], [{"d_0":"15"}], [{"d_0":"16"},{"d_1":"16"}], [{"d_0":"17"},{"d_1":"17"},{"d_2":"17"}], [{"d_0":"18"},{"d_1":"18"},{"d_2":"18"},{"d_3":"18"}], [{"d_0":"19"},{"d_1":"19"},{"d_2":"19"},{"d_3":"19"},{"d_4":"19"}]], + "json.a.r.:`Array(JSON)`.b.c.d_0.:`Int64`": [[], [], [], [], [], [], [], [], [], [], ["10"], ["11",null], ["12",null,null], ["13",null,null,null], ["14",null,null,null,null], ["15"], ["16",null], ["17",null,null], ["18",null,null,null], ["19",null,null,null,null]] +} diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.sql.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.sql.j2 new file mode 100644 index 00000000000..1353980cd35 --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_1.sql.j2 @@ -0,0 +1,41 @@ +-- Tags: no-fasttest, long + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=Memory;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; system stop merges test;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; system start merges test;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; system stop merges test;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; system start merges test;'] -%} + +{{ create_command }} + +insert into test select number, '{}' from numbers(5); +insert into test select number, toJSONString(map('a.b', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(5, 5); +insert into test select number, toJSONString(map('a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(10, 5); +insert into test select number, toJSONString(map('a.a1', number, 'a.a2', number, 'a.a3', number, 'a.a4', number, 'a.a5', number, 'a.a6', number, 'a.a7', number, 'a.a8', number, 'a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(15, 5); + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b))) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.r[]))) as paths_with_types from test order by paths_with_types; + +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format JSONColumns; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format JSONColumns; + +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format JSONColumns; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format JSONColumns; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format JSONColumns; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format JSONColumns; + +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format JSONColumns; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format JSONColumns; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format JSONColumns; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format JSONColumns; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.reference.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.reference.j2 new file mode 100644 index 00000000000..2fd3437e3d2 --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.reference.j2 @@ -0,0 +1,60 @@ +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +20000 +20000 +0 +0 +20000 +20000 +0 +0 +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +20000 +20000 +0 +0 +20000 +20000 +0 +0 diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.sql.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.sql.j2 new file mode 100644 index 00000000000..e3930165602 --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_compact_merge_tree.sql.j2 @@ -0,0 +1,57 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, '{}' from numbers(10000); +insert into test select number, toJSONString(map('a.b', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(10000, 10000); +insert into test select number, toJSONString(map('a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(20000, 10000); +insert into test select number, toJSONString(map('a.a1', number, 'a.a2', number, 'a.a3', number, 'a.a4', number, 'a.a5', number, 'a.a6', number, 'a.a7', number, 'a.a8', number, 'a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(30000, 10000); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b))) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.r[]))) as paths_with_types from test order by paths_with_types; + +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; + +select count() from test where empty(json.a.r[].c.d.e) and empty(json.a.r[].b.c.d_0) and empty(json.a.r[].b.c.d_1); +select count() from test where empty(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) and empty(json.a.r[].b.c.d_0.:Int64) and empty(json.a.r[].b.c.d_1.:Int64); +select count() from test where arrayJoin(json.a.r[].c.d.e) is null and arrayJoin(json.a.r[].b.c.d_0) is null and arrayJoin(json.a.r[].b.c.d_1) is null; +select count() from test where arrayJoin(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) is null and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null and arrayJoin(json.a.r[].b.c.d_1.:Int64) is null; + +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; + +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0); +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0.:Int64); +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0) is null; +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null; + +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; + +{% endfor -%} + +drop table test; diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.reference b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.reference new file mode 100644 index 00000000000..34557cf60bb --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.reference @@ -0,0 +1,30 @@ +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +20000 +20000 +0 +0 +20000 +20000 +0 +0 diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.sql b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.sql new file mode 100644 index 00000000000..9274b9b9cf7 --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_memory.sql @@ -0,0 +1,52 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, '{}' from numbers(10000); +insert into test select number, toJSONString(map('a.b', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(10000, 10000); +insert into test select number, toJSONString(map('a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(20000, 10000); +insert into test select number, toJSONString(map('a.a1', number, 'a.a2', number, 'a.a3', number, 'a.a4', number, 'a.a5', number, 'a.a6', number, 'a.a7', number, 'a.a8', number, 'a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(30000, 10000); + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b))) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.r[]))) as paths_with_types from test order by paths_with_types; + +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; + +select count() from test where empty(json.a.r[].c.d.e) and empty(json.a.r[].b.c.d_0) and empty(json.a.r[].b.c.d_1); +select count() from test where empty(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) and empty(json.a.r[].b.c.d_0.:Int64) and empty(json.a.r[].b.c.d_1.:Int64); +select count() from test where arrayJoin(json.a.r[].c.d.e) is null and arrayJoin(json.a.r[].b.c.d_0) is null and arrayJoin(json.a.r[].b.c.d_1) is null; +select count() from test where arrayJoin(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) is null and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null and arrayJoin(json.a.r[].b.c.d_1.:Int64) is null; + +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; + +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0); +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0.:Int64); +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0) is null; +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null; + +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.reference.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.reference.j2 new file mode 100644 index 00000000000..2fd3437e3d2 --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.reference.j2 @@ -0,0 +1,60 @@ +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +20000 +20000 +0 +0 +20000 +20000 +0 +0 +('a.a1','String') +('a.a2','String') +('a.a3','String') +('a.a4','String') +('a.a5','String') +('a.a6','String') +('a.a7','String') +('a.a8','String') +('a.b','Array(JSON)') +('a.r','Array(JSON(max_dynamic_types=16, max_dynamic_paths=2))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +('b.c.d_0','Int64') +('b.c.d_1','Int64') +('b.c.d_2','Int64') +('b.c.d_3','Int64') +('b.c.d_4','Int64') +('c.d.e','Array(Nullable(Int64))') +20000 +20000 +0 +0 +20000 +20000 +0 +0 diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 new file mode 100644 index 00000000000..3010fa0e2de --- /dev/null +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 @@ -0,0 +1,57 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, '{}' from numbers(10000); +insert into test select number, toJSONString(map('a.b', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(10000, 10000); +insert into test select number, toJSONString(map('a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(20000, 10000); +insert into test select number, toJSONString(map('a.a1', number, 'a.a2', number, 'a.a3', number, 'a.a4', number, 'a.a5', number, 'a.a6', number, 'a.a7', number, 'a.a8', number, 'a.r', arrayMap(x -> map('b.c.d_' || toString(x), number::UInt32, 'c.d.e', range((number + x) % 5 + 1)), range(number % 5 + 1)))) from numbers(30000, 10000); + +{% for merge_command in ['system stop merges test', 'system start merges test'] -%} + +{{ merge_command }}; + +select distinct arrayJoin(JSONAllPathsWithTypes(json)) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b))) as paths_with_types from test order by paths_with_types; +select distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.r[]))) as paths_with_types from test order by paths_with_types; + +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json, json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test format Null; +select json.a.b, json.a.b.c, json.a.b.c.d.e, json.a.b.b.c.d_0, json.a.b.b.c.d_1, json.a.b.b.c.d_2, json.a.b.b.c.d_3, json.a.b.b.c.d_4, json.a.r, json.a.r[], json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1, json.a.r[].b.c.d_2, json.a.r[].b.c.d_3, json.a.r[].b.c.d_4, json.^a, json.a.b.^b.c, json.a.r[].^b.c from test order by id format Null; + +select count() from test where empty(json.a.r[].c.d.e) and empty(json.a.r[].b.c.d_0) and empty(json.a.r[].b.c.d_1); +select count() from test where empty(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) and empty(json.a.r[].b.c.d_0.:Int64) and empty(json.a.r[].b.c.d_1.:Int64); +select count() from test where arrayJoin(json.a.r[].c.d.e) is null and arrayJoin(json.a.r[].b.c.d_0) is null and arrayJoin(json.a.r[].b.c.d_1) is null; +select count() from test where arrayJoin(json.a.r[].c.d.e.:`Array(Nullable(Int64))`) is null and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null and arrayJoin(json.a.r[].b.c.d_1.:Int64) is null; + +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test format Null; +select json.a.r, json.a.r[].c.d.e, json.a.r[].b.c.d_0, json.a.r[].b.c.d_1 from test order by id format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test format Null; +select json.a.r, json.a.r[].c.d.e.:`Array(Nullable(Int64))`, json.a.r[].b.c.d_0.:Int64, json.a.r[].b.c.d_1.:Int64 from test order by id format Null; + +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0); +select count() from test where empty(json.a.r[].^b) and empty(json.a.r[].^b.c) and empty(json.a.r[].b.c.d_0.:Int64); +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0) is null; +select count() from test where empty(arrayJoin(json.a.r[].^b)) and empty(arrayJoin(json.a.r[].^b.c)) and arrayJoin(json.a.r[].b.c.d_0.:Int64) is null; + +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0 from test order by id format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test format Null; +select json.a.r, json.a.r[].^b, json.a.r[].^b.c, json.a.r[].b.c.d_0.:Int64 from test order by id format Null; + +{% endfor -%} + +drop table test; diff --git a/tests/queries/0_stateless/03209_json_type_horizontal_merges.reference.j2 b/tests/queries/0_stateless/03209_json_type_horizontal_merges.reference.j2 new file mode 100644 index 00000000000..ea4e1da7181 --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_horizontal_merges.reference.j2 @@ -0,0 +1,136 @@ +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10; +Dynamic paths +100000 a +90000 b +80000 c +70000 d +60000 e +Shared data paths +Dynamic paths +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +10000 g +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +100000 a +90000 b +40000 c +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +120000 c +100000 a +Shared data paths +90000 b +70000 d +60000 e +10000 g +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10; +Dynamic paths +100000 a +90000 b +80000 c +70000 d +60000 e +Shared data paths +Dynamic paths +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +10000 g +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +100000 a +90000 b +40000 c +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +120000 c +100000 a +Shared data paths +90000 b +70000 d +60000 e +10000 g diff --git a/tests/queries/0_stateless/03209_json_type_horizontal_merges.sql.j2 b/tests/queries/0_stateless/03209_json_type_horizontal_merges.sql.j2 new file mode 100644 index 00000000000..cc143e4ceef --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_horizontal_merges.sql.j2 @@ -0,0 +1,74 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;'] -%} + +select '{{ create_command }}'; + +{{ create_command }} + +system stop merges test; +insert into test select number, toJSONString(map('a', number)) from numbers(100000); +insert into test select number, toJSONString(map('b', number)) from numbers(90000); +insert into test select number, toJSONString(map('c', number)) from numbers(80000); +insert into test select number, toJSONString(map('d', number)) from numbers(70000); +insert into test select number, toJSONString(map('e', number)) from numbers(60000); +insert into test select number, '{}' from numbers(100000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('f', number)) from numbers(200000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('g', number)) from numbers(10000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('c', number)) from numbers(40000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03209_json_type_merges_small.reference.j2 b/tests/queries/0_stateless/03209_json_type_merges_small.reference.j2 new file mode 100644 index 00000000000..f953dee10fe --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_merges_small.reference.j2 @@ -0,0 +1,272 @@ +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10; +Dynamic paths +10 a +9 b +8 c +7 d +6 e +Shared data paths +Dynamic paths +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +1 g +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +10 a +9 b +4 c +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +12 c +10 a +Shared data paths +9 b +7 d +6 e +1 g +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10; +Dynamic paths +10 a +9 b +8 c +7 d +6 e +Shared data paths +Dynamic paths +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +1 g +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +10 a +9 b +4 c +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +12 c +10 a +Shared data paths +9 b +7 d +6 e +1 g +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +10 a +9 b +8 c +7 d +6 e +Shared data paths +Dynamic paths +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +1 g +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +10 a +9 b +4 c +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +12 c +10 a +Shared data paths +9 b +7 d +6 e +1 g +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +10 a +9 b +8 c +7 d +6 e +Shared data paths +Dynamic paths +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +8 c +Shared data paths +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +1 g +Shared data paths +8 c +7 d +6 e +Dynamic paths +20 f +10 a +9 b +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +10 a +9 b +4 c +Shared data paths +8 c +7 d +6 e +1 g +Dynamic paths +20 f +12 c +10 a +Shared data paths +9 b +7 d +6 e +1 g diff --git a/tests/queries/0_stateless/03209_json_type_merges_small.sql.j2 b/tests/queries/0_stateless/03209_json_type_merges_small.sql.j2 new file mode 100644 index 00000000000..e4b64ac7561 --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_merges_small.sql.j2 @@ -0,0 +1,76 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=1;'] -%} + +select '{{ create_command }}'; + +{{ create_command }} + +system stop merges test; +insert into test select number, toJSONString(map('a', number)) from numbers(10); +insert into test select number, toJSONString(map('b', number)) from numbers(9); +insert into test select number, toJSONString(map('c', number)) from numbers(8); +insert into test select number, toJSONString(map('d', number)) from numbers(7); +insert into test select number, toJSONString(map('e', number)) from numbers(6); +insert into test select number, '{}' from numbers(100000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('f', number)) from numbers(20); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('g', number)) from numbers(1); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('c', number)) from numbers(4); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03209_json_type_vertical_merges.reference.j2 b/tests/queries/0_stateless/03209_json_type_vertical_merges.reference.j2 new file mode 100644 index 00000000000..d292b1454c6 --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_vertical_merges.reference.j2 @@ -0,0 +1,136 @@ +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +100000 a +90000 b +80000 c +70000 d +60000 e +Shared data paths +Dynamic paths +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +10000 g +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +100000 a +90000 b +40000 c +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +120000 c +100000 a +Shared data paths +90000 b +70000 d +60000 e +10000 g +create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +100000 a +90000 b +80000 c +70000 d +60000 e +Shared data paths +Dynamic paths +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +80000 c +Shared data paths +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +10000 g +Shared data paths +80000 c +70000 d +60000 e +Dynamic paths +200000 f +100000 a +90000 b +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +100000 a +90000 b +40000 c +Shared data paths +80000 c +70000 d +60000 e +10000 g +Dynamic paths +200000 f +120000 c +100000 a +Shared data paths +90000 b +70000 d +60000 e +10000 g diff --git a/tests/queries/0_stateless/03209_json_type_vertical_merges.sql.j2 b/tests/queries/0_stateless/03209_json_type_vertical_merges.sql.j2 new file mode 100644 index 00000000000..e427db7677f --- /dev/null +++ b/tests/queries/0_stateless/03209_json_type_vertical_merges.sql.j2 @@ -0,0 +1,74 @@ +-- Tags: no-fasttest, long, no-debug, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=1;'] -%} + +select '{{ create_command }}'; + +{{ create_command }} + +system stop merges test; +insert into test select number, toJSONString(map('a', number)) from numbers(100000); +insert into test select number, toJSONString(map('b', number)) from numbers(90000); +insert into test select number, toJSONString(map('c', number)) from numbers(80000); +insert into test select number, toJSONString(map('d', number)) from numbers(70000); +insert into test select number, toJSONString(map('e', number)) from numbers(60000); +insert into test select number, '{}' from numbers(100000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('f', number)) from numbers(200000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('g', number)) from numbers(10000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('c', number)) from numbers(40000); +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(json)) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(json)) as path from test group by path order by count() desc, path; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.reference b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql new file mode 100644 index 00000000000..bcc9dec306b --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_03209 SYNC; + +CREATE TABLE t_03209 ( `a` Decimal(18, 0), `b` Decimal(18, 1), `c` Decimal(36, 0) ) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_03209', 'r1') ORDER BY tuple(); +INSERT INTO t_03209 VALUES ('33', '44.4', '35'); + +SET max_parallel_replicas = 2, cluster_for_parallel_replicas='parallel_replicas'; + +SELECT * FROM t_03209 WHERE a IN toDecimal32('33.3000', 4) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SELECT * FROM t_03209 WHERE a IN toDecimal32('33.3000', 4) SETTINGS allow_experimental_parallel_reading_from_replicas=1; + +DROP TABLE t_03209 SYNC; diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.reference b/tests/queries/0_stateless/03210_dynamic_squashing.reference new file mode 100644 index 00000000000..1c23c22f550 --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.reference @@ -0,0 +1,12 @@ +1 +Array(UInt8) true +None false +UInt64 false +2 +Array(UInt8) true +None false +UInt64 false +3 +Array(UInt8) true +String false +UInt64 true diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql new file mode 100644 index 00000000000..71d09263fda --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql @@ -0,0 +1,25 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; +set max_block_size = 1000; + +drop table if exists test; + +create table test (d Dynamic) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000); +select '1'; +select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type; + +drop table test; +create table test (d Dynamic(max_types=1)) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000); +select '2'; +select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type; + +truncate table test; +insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000); +select '3'; +select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type; + +drop table test; + diff --git a/tests/queries/0_stateless/03210_json_type_alter_add_column.reference.j2 b/tests/queries/0_stateless/03210_json_type_alter_add_column.reference.j2 new file mode 100644 index 00000000000..37b6854938a --- /dev/null +++ b/tests/queries/0_stateless/03210_json_type_alter_add_column.reference.j2 @@ -0,0 +1,72 @@ +initial insert +alter add column 1 +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +insert after alter add column +3 a.b +3 b.c +3 c.d +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +3 {"a":{"b":"3"}} 3 {"b":"3"} \N \N +4 {"a":{"b":"4"}} 4 {"b":"4"} \N \N +5 {"a":{"b":"5"}} 5 {"b":"5"} \N \N +6 {"b":{"c":"6"}} \N {} 6 \N +7 {"b":{"c":"7"}} \N {} 7 \N +8 {"b":{"c":"8"}} \N {} 8 \N +9 {"c":{"d":"9"}} \N {} \N 9 +10 {"c":{"d":"10"}} \N {} \N 10 +11 {"c":{"d":"11"}} \N {} \N 11 +12 {} \N {} \N \N +13 {} \N {} \N \N +14 {} \N {} \N \N +initial insert +alter add column 1 +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +insert after alter add column +3 a.b +3 b.c +3 c.d +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +3 {"a":{"b":"3"}} 3 {"b":"3"} \N \N +4 {"a":{"b":"4"}} 4 {"b":"4"} \N \N +5 {"a":{"b":"5"}} 5 {"b":"5"} \N \N +6 {"b":{"c":"6"}} \N {} 6 \N +7 {"b":{"c":"7"}} \N {} 7 \N +8 {"b":{"c":"8"}} \N {} 8 \N +9 {"c":{"d":"9"}} \N {} \N 9 +10 {"c":{"d":"10"}} \N {} \N 10 +11 {"c":{"d":"11"}} \N {} \N 11 +12 {} \N {} \N \N +13 {} \N {} \N \N +14 {} \N {} \N \N +initial insert +alter add column 1 +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +insert after alter add column +3 a.b +3 b.c +3 c.d +0 {} \N {} \N \N +1 {} \N {} \N \N +2 {} \N {} \N \N +3 {"a":{"b":"3"}} 3 {"b":"3"} \N \N +4 {"a":{"b":"4"}} 4 {"b":"4"} \N \N +5 {"a":{"b":"5"}} 5 {"b":"5"} \N \N +6 {"b":{"c":"6"}} \N {} 6 \N +7 {"b":{"c":"7"}} \N {} 7 \N +8 {"b":{"c":"8"}} \N {} 8 \N +9 {"c":{"d":"9"}} \N {} \N 9 +10 {"c":{"d":"10"}} \N {} \N 10 +11 {"c":{"d":"11"}} \N {} \N 11 +12 {} \N {} \N \N +13 {} \N {} \N \N +14 {} \N {} \N \N diff --git a/tests/queries/0_stateless/03210_json_type_alter_add_column.sql.j2 b/tests/queries/0_stateless/03210_json_type_alter_add_column.sql.j2 new file mode 100644 index 00000000000..add57928804 --- /dev/null +++ b/tests/queries/0_stateless/03210_json_type_alter_add_column.sql.j2 @@ -0,0 +1,34 @@ +-- Tags: no-fasttest, long + +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set allow_experimental_json_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (x UInt64) engine=Memory;', + 'create table test (x UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;', + 'create table test (x UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;'] -%} + +{{ create_command }} + +select 'initial insert'; +insert into test select number from numbers(3); + +select 'alter add column 1'; +alter table test add column json JSON settings mutations_sync=1; +select count(), arrayJoin(JSONAllPaths(json)) as path from test group by path order by count() desc, path; +select x, json, json.a.b, json.^a, json.b.c.:Int64, json.c.d from test order by x; + +select 'insert after alter add column'; +insert into test select number, toJSONString(map('a.b', number::UInt32)) from numbers(3, 3); +insert into test select number, toJSONString(map('b.c', number::UInt32)) from numbers(6, 3); +insert into test select number, toJSONString(map('c.d', number::UInt32)) from numbers(9, 3); +insert into test select number, '{}' from numbers(12, 3); +select count(), arrayJoin(JSONAllPaths(json)) as path from test group by path order by count() desc, path; +select x, json, json.a.b, json.^a, json.b.c.:Int64, json.c.d from test order by x; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference index d4734a85e72..4ecf7f56b07 100644 --- a/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference @@ -38,3 +38,19 @@ 7 8 9 +15 \N 3 15 15 15 15 +14 \N 2 10 10 10 154 +13 \N 2 10 10 10 143 +12 \N 2 10 10 10 14 +11 \N 2 10 10 10 12 +10 \N 2 10 10 10 10 +9 \N 1 5 5 5 99 +8 \N 1 5 5 5 88 +7 \N 1 5 5 5 9 +6 \N 1 5 5 5 7 +5 \N 1 5 5 5 5 +4 \N 0 0 0 0 44 +3 \N 0 0 0 0 33 +2 \N 0 0 0 0 4 +1 \N 0 0 0 0 2 +0 \N 0 0 0 0 0 diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql index f6017ee6690..cc6746e428f 100644 --- a/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql @@ -2,3 +2,23 @@ SELECT lagInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (OR SELECT leadInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); SELECT lagInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); SELECT leadInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); + +SELECT + number, + YYYYMMDDToDate(1, toLowCardinality(11), max(YYYYMMDDToDate(YYYYMMDDToDate(toLowCardinality(1), 11, materialize(NULL), 19700101.1, 1, 27, 7, materialize(toUInt256(37)), 9, 19, 9), 1, toUInt128(11), NULL, 19700101.1, 1, 27, 7, 37, 9, 19, 9), toUInt256(30)) IGNORE NULLS OVER w, NULL, 19700101.1, toNullable(1), 27, materialize(7), 37, 9, 19, 9), + p, + pp, + lagInFrame(number, number - pp) OVER w AS lag2, + lagInFrame(number, number - pp, number * 11) OVER w AS lag, + leadInFrame(number, number - pp, number * 11) OVER w AS lead +FROM +( + SELECT + number, + intDiv(number, 5) AS p, + p * 5 AS pp + FROM numbers(16) +) +WHERE toLowCardinality(1) +WINDOW w AS (PARTITION BY p ORDER BY number ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +ORDER BY number DESC NULLS LAST; diff --git a/tests/queries/0_stateless/03211_nested_json_merges.reference.j2 b/tests/queries/0_stateless/03211_nested_json_merges.reference.j2 new file mode 100644 index 00000000000..9b6ed82abed --- /dev/null +++ b/tests/queries/0_stateless/03211_nested_json_merges.reference.j2 @@ -0,0 +1,136 @@ +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; +Dynamic paths +300000 c +150000 d +Shared data paths +Dynamic paths +300000 c +Shared data paths +150000 d +Dynamic paths +600000 f +300000 c +150000 e +Shared data paths +150000 d +Dynamic paths +600000 f +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +600000 f +450000 c +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +750000 c +Shared data paths +600000 f +150000 d +150000 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +Dynamic paths +300000 c +150000 d +Shared data paths +Dynamic paths +300000 c +Shared data paths +150000 d +Dynamic paths +600000 f +300000 c +150000 e +Shared data paths +150000 d +Dynamic paths +600000 f +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +600000 f +450000 c +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +750000 c +Shared data paths +600000 f +150000 d +150000 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +300000 c +150000 d +Shared data paths +Dynamic paths +300000 c +Shared data paths +150000 d +Dynamic paths +600000 f +300000 c +150000 e +Shared data paths +150000 d +Dynamic paths +600000 f +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +600000 f +450000 c +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +750000 c +Shared data paths +600000 f +150000 d +150000 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +300000 c +150000 d +Shared data paths +Dynamic paths +300000 c +Shared data paths +150000 d +Dynamic paths +600000 f +300000 c +150000 e +Shared data paths +150000 d +Dynamic paths +600000 f +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +600000 f +450000 c +Shared data paths +300000 c +150000 d +150000 e +Dynamic paths +750000 c +Shared data paths +600000 f +150000 d +150000 e diff --git a/tests/queries/0_stateless/03211_nested_json_merges.sql.j2 b/tests/queries/0_stateless/03211_nested_json_merges.sql.j2 new file mode 100644 index 00000000000..0af998e22bb --- /dev/null +++ b/tests/queries/0_stateless/03211_nested_json_merges.sql.j2 @@ -0,0 +1,63 @@ +-- Tags: no-fasttest, long, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;'] -%} + +select '{{ create_command }}'; + +{{ create_command }} + +system stop merges test; +insert into test select number, toJSONString(map('a', number)) from numbers(100000); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('c', x), range(number % 5 + 1)))) from numbers(100000); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('d', x), range(number % 5 + 1)))) from numbers(50000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('b', arrayMap(x -> map('e', x), range(number % 5 + 1)))) from numbers(50000); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('f', x), range(number % 5 + 1)))) from numbers(200000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('b', arrayMap(x -> map('c', x), range(number % 5 + 1)))) from numbers(150000); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03211_nested_json_merges_small.reference.j2 b/tests/queries/0_stateless/03211_nested_json_merges_small.reference.j2 new file mode 100644 index 00000000000..76339dba3e3 --- /dev/null +++ b/tests/queries/0_stateless/03211_nested_json_merges_small.reference.j2 @@ -0,0 +1,136 @@ +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; +Dynamic paths +30 c +15 d +Shared data paths +Dynamic paths +30 c +Shared data paths +15 d +Dynamic paths +60 f +30 c +15 e +Shared data paths +15 d +Dynamic paths +60 f +Shared data paths +30 c +15 d +15 e +Dynamic paths +60 f +45 c +Shared data paths +30 c +15 d +15 e +Dynamic paths +75 c +Shared data paths +60 f +15 d +15 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +Dynamic paths +30 c +15 d +Shared data paths +Dynamic paths +30 c +Shared data paths +15 d +Dynamic paths +60 f +30 c +15 e +Shared data paths +15 d +Dynamic paths +60 f +Shared data paths +30 c +15 d +15 e +Dynamic paths +60 f +45 c +Shared data paths +30 c +15 d +15 e +Dynamic paths +75 c +Shared data paths +60 f +15 d +15 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +30 c +15 d +Shared data paths +Dynamic paths +30 c +Shared data paths +15 d +Dynamic paths +60 f +30 c +15 e +Shared data paths +15 d +Dynamic paths +60 f +Shared data paths +30 c +15 d +15 e +Dynamic paths +60 f +45 c +Shared data paths +30 c +15 d +15 e +Dynamic paths +75 c +Shared data paths +60 f +15 d +15 e +create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +Dynamic paths +30 c +15 d +Shared data paths +Dynamic paths +30 c +Shared data paths +15 d +Dynamic paths +60 f +30 c +15 e +Shared data paths +15 d +Dynamic paths +60 f +Shared data paths +30 c +15 d +15 e +Dynamic paths +60 f +45 c +Shared data paths +30 c +15 d +15 e +Dynamic paths +75 c +Shared data paths +60 f +15 d +15 e diff --git a/tests/queries/0_stateless/03211_nested_json_merges_small.sql.j2 b/tests/queries/0_stateless/03211_nested_json_merges_small.sql.j2 new file mode 100644 index 00000000000..86e5a6c71c9 --- /dev/null +++ b/tests/queries/0_stateless/03211_nested_json_merges_small.sql.j2 @@ -0,0 +1,63 @@ +-- Tags: no-fasttest, long, no-tsan, no-asan, no-msan, no-ubsan + +set allow_experimental_json_type = 1; + +drop table if exists test; + +{% for create_command in ['create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;', + 'create table test (id UInt64, json JSON(max_dynamic_paths=4)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;'] -%} + +select '{{ create_command }}'; + +{{ create_command }} + +system stop merges test; +insert into test select number, toJSONString(map('a', number)) from numbers(10); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('c', x), range(number % 5 + 1)))) from numbers(10); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('d', x), range(number % 5 + 1)))) from numbers(5); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('b', arrayMap(x -> map('e', x), range(number % 5 + 1)))) from numbers(5); +insert into test select number, toJSONString(map('b', arrayMap(x -> map('f', x), range(number % 5 + 1)))) from numbers(20); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +system stop merges test; +insert into test select number, toJSONString(map('b', arrayMap(x -> map('c', x), range(number % 5 + 1)))) from numbers(15); + +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +system start merges test; +optimize table test final; +select 'Dynamic paths'; +select count(), arrayJoin(JSONDynamicPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; +select 'Shared data paths'; +select count(), arrayJoin(JSONSharedDataPaths(arrayJoin(json.b[]))) as path from test group by path order by count() desc, path; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference index cd109daac52..13b1138d1c4 100644 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference @@ -1,2 +1,2 @@ x Nullable(Int64) -schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false +schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh index 8a77538f592..adbb0cb6de0 100755 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS input_format_max_bytes_to_read_for_schema_inference=1000; SELECT additional_format_info from system.schema_inference_cache" diff --git a/tests/queries/0_stateless/03214_json_typed_dynamic_path.reference b/tests/queries/0_stateless/03214_json_typed_dynamic_path.reference new file mode 100644 index 00000000000..1b3e6b7a8db --- /dev/null +++ b/tests/queries/0_stateless/03214_json_typed_dynamic_path.reference @@ -0,0 +1,4 @@ +{"a":"42"} +{"a":["1","2","3"]} +{"a":"42"} +{"a":["1","2","3"]} diff --git a/tests/queries/0_stateless/03214_json_typed_dynamic_path.sql b/tests/queries/0_stateless/03214_json_typed_dynamic_path.sql new file mode 100644 index 00000000000..1f6a025825a --- /dev/null +++ b/tests/queries/0_stateless/03214_json_typed_dynamic_path.sql @@ -0,0 +1,17 @@ +-- Tags: no-fasttest + +set allow_experimental_json_type = 1; +drop table if exists test; +create table test (json JSON(a Dynamic)) engine=MergeTree order by tuple() settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +insert into test select '{"a" : 42}'; +insert into test select '{"a" : [1, 2, 3]}'; +optimize table test; +select * from test order by toString(json); +drop table test; + +create table test (json JSON(a Dynamic)) engine=MergeTree order by tuple() settings min_rows_for_wide_part=10000000, min_bytes_for_wide_part=10000000; +insert into test select '{"a" : 42}'; +insert into test select '{"a" : [1, 2, 3]}'; +optimize table test; +select * from test order by toString(json); +drop table test; diff --git a/tests/queries/0_stateless/03215_grant_current_grants.reference b/tests/queries/0_stateless/03215_grant_current_grants.reference new file mode 100644 index 00000000000..e4f6850b806 --- /dev/null +++ b/tests/queries/0_stateless/03215_grant_current_grants.reference @@ -0,0 +1,2 @@ +GRANT SELECT, CREATE TABLE, CREATE VIEW ON default.* +GRANT SELECT ON default.* diff --git a/tests/queries/0_stateless/03215_grant_current_grants.sh b/tests/queries/0_stateless/03215_grant_current_grants.sh new file mode 100755 index 00000000000..68af4a62bba --- /dev/null +++ b/tests/queries/0_stateless/03215_grant_current_grants.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +user1="user03215_1_${CLICKHOUSE_DATABASE}_$RANDOM" +user2="user03215_2_${CLICKHOUSE_DATABASE}_$RANDOM" +user3="user03215_3_${CLICKHOUSE_DATABASE}_$RANDOM" +db=${CLICKHOUSE_DATABASE} + + +${CLICKHOUSE_CLIENT} --query "CREATE USER $user1, $user2, $user3;"; +${CLICKHOUSE_CLIENT} --query "GRANT SELECT, CREATE TABLE, CREATE VIEW ON $db.* TO $user1 WITH GRANT OPTION;"; + +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user2" --user $user1; +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user3" --user $user2; + +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user2" | sed 's/ TO.*//'; +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//'; + +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS(SELECT ON $db.*) TO $user3" --user $user1; +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//'; + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3"; diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.reference b/tests/queries/0_stateless/03215_multilinestring_geometry.reference new file mode 100644 index 00000000000..9702dd6d6f8 --- /dev/null +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.reference @@ -0,0 +1,30 @@ +-- { echoOn } +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +[[(1,1),(2,2),(3,3),(1,1)]] +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +MultiLineString +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +MULTILINESTRING((1 1,2 2,3 3,1 1)) +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +[[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +MultiLineString +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +MULTILINESTRING((1 1,2 2,3 3,1 1),(1 0,2 0,3 0)) +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x +SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); +POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon +-- Non constant tests + +DROP TABLE IF EXISTS t; +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory; +INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))'); +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord; +POLYGON((1 1,2 2,3 3,1 1)) [[(1,1),(2,2),(3,3),(1,1)]] 1 +POLYGON((1 1,2 2,3 3,1 1),(1 0,2 0,3 0,1 0)) [[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] 1 +POLYGON((1 0,2 1,3 0,4 1,5 0,6 1,7 0,8 1,9 0,10 1,1 0)) [[(1,0),(2,1),(3,0),(4,1),(5,0),(6,1),(7,0),(8,1),(9,0),(10,1)]] 1 diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.sql b/tests/queries/0_stateless/03215_multilinestring_geometry.sql new file mode 100644 index 00000000000..cf4ef15f63d --- /dev/null +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.sql @@ -0,0 +1,26 @@ +-- { echoOn } +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); + +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); + +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x +SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); + +-- Non constant tests + +DROP TABLE IF EXISTS t; +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory; +INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))'); + +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord; + + diff --git a/tests/queries/0_stateless/03215_view_with_recursive.reference b/tests/queries/0_stateless/03215_view_with_recursive.reference new file mode 100644 index 00000000000..c3ca8065a70 --- /dev/null +++ b/tests/queries/0_stateless/03215_view_with_recursive.reference @@ -0,0 +1,2 @@ +5050 +8 diff --git a/tests/queries/0_stateless/03215_view_with_recursive.sql b/tests/queries/0_stateless/03215_view_with_recursive.sql new file mode 100644 index 00000000000..5d93ccc5438 --- /dev/null +++ b/tests/queries/0_stateless/03215_view_with_recursive.sql @@ -0,0 +1,43 @@ +SET allow_experimental_analyzer = 1; + +CREATE VIEW 03215_test_v +AS WITH RECURSIVE test_table AS + ( + SELECT 1 AS number + UNION ALL + SELECT number + 1 + FROM test_table + WHERE number < 100 + ) +SELECT sum(number) +FROM test_table; + +SELECT * FROM 03215_test_v; + +CREATE VIEW 03215_multi_v +AS WITH RECURSIVE + task AS + ( + SELECT + number AS task_id, + number - 1 AS parent_id + FROM numbers(10) + ), + rtq AS + ( + SELECT + task_id, + parent_id + FROM task AS t + WHERE t.parent_id = 1 + UNION ALL + SELECT + t.task_id, + t.parent_id + FROM task AS t, rtq AS r + WHERE t.parent_id = r.task_id + ) +SELECT count() +FROM rtq; + +SELECT * FROM 03215_multi_v; diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference new file mode 100644 index 00000000000..d366ad04c39 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference @@ -0,0 +1,6 @@ +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + ReadFromMerge + Filter (( + ( + ))) + ReadFromMergeTree (default.test_03217_merge_replica_1) diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql new file mode 100644 index 00000000000..42d31e95f9c --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql @@ -0,0 +1,16 @@ +CREATE TABLE test_03217_merge_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1') + ORDER BY x; +CREATE TABLE test_03217_merge_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2') + ORDER BY x; + + +CREATE TABLE test_03217_all_replicas (x UInt32) + ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*'); + +INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10); +SYSTEM SYNC REPLICA test_03217_merge_replica_2; + +-- If the filter on _table is not applied, then the plan will show both replicas +EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference new file mode 100644 index 00000000000..c0761c3f689 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference @@ -0,0 +1,6 @@ +information_schema tables +both default test_03217_system_tables_replica_1 r1 +both default test_03217_system_tables_replica_2 r2 +default test_03217_system_tables_replica_1 r1 +1 +1 diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql new file mode 100644 index 00000000000..eb506dfe39a --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -0,0 +1,30 @@ +-- If filtering is not done correctly on databases, then this query report to read 3 rows, which are: `system.tables`, `information_schema.tables` and `INFORMATION_SCHEMA.tables` +SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables'; + +CREATE TABLE test_03217_system_tables_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r1') + ORDER BY x; +CREATE TABLE test_03217_system_tables_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2') + ORDER BY x; + +-- Make sure we can read both replicas +-- The replica name might be altered because of `_functional_tests_helper_database_replicated_replace_args_macros`, +-- thus we need to use `left` +SELECT 'both', database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase(); +-- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables +SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name LIKE 'r1%'; +SYSTEM FLUSH LOGS; +-- argMax is necessary to make the test repeatable + +-- StorageSystemTables +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() + AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';' + AND type = 'QueryFinish'; + +-- StorageSystemReplicas +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() + AND query LIKE '%SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name LIKE \'r1\%\';' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03218_materialize_msan.reference b/tests/queries/0_stateless/03218_materialize_msan.reference new file mode 100644 index 00000000000..eb84f35f9f4 --- /dev/null +++ b/tests/queries/0_stateless/03218_materialize_msan.reference @@ -0,0 +1 @@ +[(NULL,'11\01111111\011111','1111')] -2147483648 \N diff --git a/tests/queries/0_stateless/03218_materialize_msan.sql b/tests/queries/0_stateless/03218_materialize_msan.sql new file mode 100644 index 00000000000..7e7043e687b --- /dev/null +++ b/tests/queries/0_stateless/03218_materialize_msan.sql @@ -0,0 +1,23 @@ +SET enable_analyzer = 1; + +SELECT + materialize([(NULL, '11\01111111\011111', '1111')]) AS t, + (t[1048576]).2, + materialize(-2147483648), + (t[-2147483648]).1 +GROUP BY + materialize([(NULL, '1')]), + '', + (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL, 2147483647, t[65535], 256)), materialize(NULL)) +; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT + materialize([(NULL, '11\01111111\011111', '1111')]) AS t, + (t[1048576]).2, + materialize(-2147483648), + (t[-2147483648]).1 +GROUP BY + materialize([(NULL, '1')]), + '', + (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL), materialize(2147483647), materialize(t[65535]), materialize(256)), materialize(NULL)) +; diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql new file mode 100644 index 00000000000..18b3ed7bcec --- /dev/null +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql @@ -0,0 +1,24 @@ +-- Tags: no-parallel + +SET create_if_not_exists=0; -- Default + +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -- { serverError TABLE_ALREADY_EXISTS } + +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; -- { serverError DATABASE_ALREADY_EXISTS } + +SET create_if_not_exists=1; + +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; + +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; + +DROP DATABASE IF EXISTS example_database; +DROP TABLE IF EXISTS example_table; \ No newline at end of file diff --git a/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference new file mode 100644 index 00000000000..68538c3f75b --- /dev/null +++ b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference @@ -0,0 +1,2 @@ +QUERY_WAS_CANCELLED +QUERY_WAS_CANCELLED diff --git a/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh new file mode 100755 index 00000000000..db943a665cb --- /dev/null +++ b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "create table null_t (number UInt64) engine = Null;" +${CLICKHOUSE_CLIENT} --query "select sleep(0.1) from system.numbers settings max_block_size = 1 format Native" 2>/dev/null | ${CLICKHOUSE_CLIENT} --max_execution_time 0.3 --timeout_overflow_mode break --query "insert into null_t format Native" 2>&1 | grep -o "QUERY_WAS_CANCELLED" diff --git a/tests/queries/0_stateless/03221_key_condition_bug.reference b/tests/queries/0_stateless/03221_key_condition_bug.reference new file mode 100644 index 00000000000..e373ee695f6 --- /dev/null +++ b/tests/queries/0_stateless/03221_key_condition_bug.reference @@ -0,0 +1 @@ +50 diff --git a/tests/queries/0_stateless/03221_key_condition_bug.sql b/tests/queries/0_stateless/03221_key_condition_bug.sql new file mode 100644 index 00000000000..bac3e631a81 --- /dev/null +++ b/tests/queries/0_stateless/03221_key_condition_bug.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS report_metrics_v2 +( + `a` UInt64 +) Engine = MergeTree() +ORDER BY a; + +insert into report_metrics_v2 SELECT * FROM system.numbers LIMIT 50000; + +SELECT count(*) from report_metrics_v2 WHERE (intDiv(a, 50) = 200) AND (intDiv(a, 50000) = 0); + +DROP TABLE report_metrics_v2; diff --git a/tests/queries/0_stateless/03221_merge_profile_events.reference b/tests/queries/0_stateless/03221_merge_profile_events.reference new file mode 100644 index 00000000000..d969717336b --- /dev/null +++ b/tests/queries/0_stateless/03221_merge_profile_events.reference @@ -0,0 +1,3 @@ +Horizontal 1 20000 3 0 480000 1 1 1 1 +Vertical 1 20000 1 2 480000 1 1 1 1 1 1 +Vertical 2 400000 2 6 12800000 1 1 1 1 1 1 1 1 1 1 diff --git a/tests/queries/0_stateless/03221_merge_profile_events.sql b/tests/queries/0_stateless/03221_merge_profile_events.sql new file mode 100644 index 00000000000..1aa3dd266f8 --- /dev/null +++ b/tests/queries/0_stateless/03221_merge_profile_events.sql @@ -0,0 +1,90 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings + +DROP TABLE IF EXISTS t_merge_profile_events_1; + +CREATE TABLE t_merge_profile_events_1 (id UInt64, v1 UInt64, v2 UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_merge_profile_events_1 SELECT number, number, number FROM numbers(10000); +INSERT INTO t_merge_profile_events_1 SELECT number, number, number FROM numbers(10000); + +OPTIMIZE TABLE t_merge_profile_events_1 FINAL; +SYSTEM FLUSH LOGS; + +SELECT + merge_algorithm, + ProfileEvents['Merge'], + ProfileEvents['MergedRows'], + ProfileEvents['MergedColumns'], + ProfileEvents['GatheredColumns'], + ProfileEvents['MergedUncompressedBytes'], + ProfileEvents['MergeTotalMilliseconds'] > 0, + ProfileEvents['MergeExecuteMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0 +FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_1' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; + +DROP TABLE IF EXISTS t_merge_profile_events_1; + +DROP TABLE IF EXISTS t_merge_profile_events_2; + +CREATE TABLE t_merge_profile_events_2 (id UInt64, v1 UInt64, v2 UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1; + +INSERT INTO t_merge_profile_events_2 SELECT number, number, number FROM numbers(10000); +INSERT INTO t_merge_profile_events_2 SELECT number, number, number FROM numbers(10000); + +OPTIMIZE TABLE t_merge_profile_events_2 FINAL; +SYSTEM FLUSH LOGS; + +SELECT + merge_algorithm, + ProfileEvents['Merge'], + ProfileEvents['MergedRows'], + ProfileEvents['MergedColumns'], + ProfileEvents['GatheredColumns'], + ProfileEvents['MergedUncompressedBytes'], + ProfileEvents['MergeTotalMilliseconds'] > 0, + ProfileEvents['MergeExecuteMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0, + ProfileEvents['MergeVerticalStageTotalMilliseconds'] > 0, + ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0, +FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_2' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; + +DROP TABLE IF EXISTS t_merge_profile_events_2; + +DROP TABLE IF EXISTS t_merge_profile_events_3; + +CREATE TABLE t_merge_profile_events_3 (id UInt64, v1 UInt64, v2 UInt64, PROJECTION p (SELECT v2, v2 * v2, v2 * 2, v2 * 10, v1 ORDER BY v1)) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1; + +INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(100000); +INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(100000); + +OPTIMIZE TABLE t_merge_profile_events_3 FINAL; +SYSTEM FLUSH LOGS; + +SELECT + merge_algorithm, + ProfileEvents['Merge'], + ProfileEvents['MergedRows'], + ProfileEvents['MergedColumns'], + ProfileEvents['GatheredColumns'], + ProfileEvents['MergedUncompressedBytes'], + ProfileEvents['MergeTotalMilliseconds'] > 0, + ProfileEvents['MergeExecuteMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0, + ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0, + ProfileEvents['MergeVerticalStageTotalMilliseconds'] > 0, + ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0, + ProfileEvents['MergeProjectionStageTotalMilliseconds'] > 0, + ProfileEvents['MergeProjectionStageExecuteMilliseconds'] > 0, + ProfileEvents['MergeExecuteMilliseconds'] <= duration_ms, + ProfileEvents['MergeTotalMilliseconds'] <= duration_ms +FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_3' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; + +DROP TABLE IF EXISTS t_merge_profile_events_3; diff --git a/tests/queries/0_stateless/03221_mutate_profile_events.reference b/tests/queries/0_stateless/03221_mutate_profile_events.reference new file mode 100644 index 00000000000..d094c37ff88 --- /dev/null +++ b/tests/queries/0_stateless/03221_mutate_profile_events.reference @@ -0,0 +1,2 @@ +3 2 1 10000 160000 0 1 1 1 +4 2 1 10000 320000 1 0 1 1 diff --git a/tests/queries/0_stateless/03221_mutate_profile_events.sh b/tests/queries/0_stateless/03221_mutate_profile_events.sh new file mode 100755 index 00000000000..3758db905e0 --- /dev/null +++ b/tests/queries/0_stateless/03221_mutate_profile_events.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query " + DROP TABLE IF EXISTS t_mutate_profile_events; + + CREATE TABLE t_mutate_profile_events (key UInt64, id UInt64, v1 UInt64, v2 UInt64) + ENGINE = MergeTree ORDER BY id PARTITION BY key + SETTINGS min_bytes_for_wide_part = 0; + + INSERT INTO t_mutate_profile_events SELECT 1, number, number, number FROM numbers(10000); + INSERT INTO t_mutate_profile_events SELECT 2, number, number, number FROM numbers(10000); + + SET mutations_sync = 2; + + ALTER TABLE t_mutate_profile_events UPDATE v1 = 1000 WHERE key = 1; + ALTER TABLE t_mutate_profile_events DELETE WHERE key = 2 AND v2 % 10 = 0; +" + +# Mutation query may return before the entry is added to part log. +# So, we may have to retry the flush of logs until all entries are actually flushed. +for _ in {1..10}; do + ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + res=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.part_log WHERE database = currentDatabase() AND table = 't_mutate_profile_events' AND event_type = 'MutatePart'") + + if [[ $res -eq 4 ]]; then + break + fi + + sleep 2.0 +done + +${CLICKHOUSE_CLIENT} --query " + SELECT + splitByChar('_', part_name)[-1] AS version, + sum(ProfileEvents['MutationTotalParts']), + sum(ProfileEvents['MutationUntouchedParts']), + sum(ProfileEvents['MutatedRows']), + sum(ProfileEvents['MutatedUncompressedBytes']), + sum(ProfileEvents['MutationAllPartColumns']), + sum(ProfileEvents['MutationSomePartColumns']), + sum(ProfileEvents['MutationTotalMilliseconds']) > 0, + sum(ProfileEvents['MutationExecuteMilliseconds']) > 0, + FROM system.part_log + WHERE database = currentDatabase() AND table = 't_mutate_profile_events' AND event_type = 'MutatePart' + GROUP BY version ORDER BY version; + + DROP TABLE IF EXISTS t_mutate_profile_events; +" diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference new file mode 100644 index 00000000000..68f8708dbaf --- /dev/null +++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference @@ -0,0 +1,4 @@ +1_1_1_0_3 10000 +1_1_1_0_4 0 +2_2_2_0_3 0 +2_2_2_0_4 10000 diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh new file mode 100755 index 00000000000..03fd15f54e2 --- /dev/null +++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query " + DROP TABLE IF EXISTS t_mutate_skip_part; + + CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) + ENGINE = MergeTree ORDER BY id PARTITION BY key + SETTINGS min_bytes_for_wide_part = 0; + + INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000); + INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000); + + SET mutations_sync = 2; + ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1; + ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0; +" + +# Mutation query may return before the entry is added to part log. +# So, we may have to retry the flush of logs until all entries are actually flushed. +for _ in {1..10}; do + ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + res=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.part_log WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'") + + if [[ $res -eq 4 ]]; then + break + fi + + sleep 2.0 +done + +${CLICKHOUSE_CLIENT} --query " + SYSTEM FLUSH LOGS; + + -- If part is skipped in mutation and hardlinked then read_rows must be 0. + SELECT part_name, read_rows + FROM system.part_log + WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart' + ORDER BY part_name; + + DROP TABLE IF EXISTS t_mutate_skip_part; +" diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.reference b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference new file mode 100644 index 00000000000..5ed392e61c7 --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference @@ -0,0 +1,2 @@ +0 +4 4 1 diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql new file mode 100644 index 00000000000..98e1c48478d --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -0,0 +1,20 @@ +-- Tags: no-replicated-database, no-ordinary-database + +set allow_experimental_refreshable_materialized_view=1; + +CREATE MATERIALIZED VIEW 03221_rmv +REFRESH AFTER 10 SECOND +( +x UInt64 +) +ENGINE = Memory +AS SELECT number AS x +FROM numbers(3) +UNION ALL +SELECT rand64() AS x; + +SELECT sleep(2); + +SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; + +DROP TABLE 03221_rmv; diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh new file mode 100755 index 00000000000..021278955cd --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-asan, no-msan, no-tsan +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Inaccessible IMDS should not introduce large delays, so this query should reply quickly at least sometimes: +while true +do + # This host (likely) drops packets sent to it (does not reply), so it is good for testing timeouts. + # At the same time, we expect that the clickhouse host does not drop packets and quickly replies with 4xx, which is a non-retriable error for S3. + AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/nonexistent')" |& grep -v -F 404 | + ${CLICKHOUSE_LOCAL} --input-format TSV "SELECT c1::Float64 < 1 FROM table" | grep 1 && break +done diff --git a/tests/queries/0_stateless/03222_create_timeseries_table.reference b/tests/queries/0_stateless/03222_create_timeseries_table.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03222_create_timeseries_table.sql b/tests/queries/0_stateless/03222_create_timeseries_table.sql new file mode 100644 index 00000000000..bdb29e7d366 --- /dev/null +++ b/tests/queries/0_stateless/03222_create_timeseries_table.sql @@ -0,0 +1,7 @@ +SET allow_experimental_time_series_table = 1; + +CREATE TABLE 03222_timeseries_table1 ENGINE = TimeSeries FORMAT Null; +CREATE TABLE 03222_timeseries_table2 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 1, aggregate_min_time_and_max_time = 1 FORMAT Null; +--- This doesn't work because allow_nullable_key cannot be set in query for the internal MergeTree tables +--- CREATE TABLE 03222_timeseries_table3 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 1, aggregate_min_time_and_max_time = 0; +CREATE TABLE 03222_timeseries_table4 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 0 FORMAT Null; diff --git a/tests/queries/0_stateless/03222_date_time_inference.reference b/tests/queries/0_stateless/03222_date_time_inference.reference new file mode 100644 index 00000000000..221ab1fe5f5 --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.reference @@ -0,0 +1,273 @@ +Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +String +2020_01_01 String +2020_1_01 String +2020_01_1 String +2020_1_1 String +2020a01a01 String +2020a1a01 String +2020a01a1 String +2020a1a1 String +20200101 String +DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +String +2020_01_01 42:42:42 String +2020a01a01 42:42:42 String +2020-01-01 42.42.42 String +2020-01-01 42 42 42 String +2020-01-01 42a42a42 String +DateTime64 +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +String +2020_01_01 42:42:42.4242 String +2020a01a01 42:42:42.4242 String +2020-01-01 42.42.42.4242 String +2020-01-01 42 42 42.4242 String +2020-01-01 42a42a42.4242 String +DateTime/DateTime64 best effort +2000-01-01 00:00:00 DateTime +2000-01-01 01:00:00 DateTime +2000-01-01 01:00:00.000000000 DateTime64(9) +02/01/17 010203 MSK String +02/01/17 010203.000 MSK String +02/01/17 010203 MSK+0100 String +02/01/17 010203.000 MSK+0100 String +02/01/17 010203 UTC+0300 String +02/01/17 010203.000 UTC+0300 String +02/01/17 010203Z String +02/01/17 010203.000Z String +02/01/1970 010203Z String +02/01/1970 010203.000Z String +02/01/70 010203Z String +02/01/70 010203.000Z String +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2000-04-17 01:02:03 DateTime +2000-04-17 01:02:03.000000000 DateTime64(9) +19700102 01:00:00 String +19700102 01:00:00.000 String +19700102010203Z String +19700102010203Z.000 String +1970/01/02 010203Z String +1970/01/02 010203.000Z String +2015-12-31 20:00:00 DateTime +2015-12-31 20:00:00 DateTime +2016-01-01 00:00:00 DateTime +2016-01-01 00:00:00 DateTime +201701 02 010203 UTC+0300 String +201701 02 010203.000 UTC+0300 String +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:34:05 DateTime +2017-01-02 02:34:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-01 18:04:05 DateTime +2017-01-01 18:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-01 23:04:05 DateTime +2017-01-01 23:04:05.000000000 DateTime64(9) +2017-02-01 23:04:05 DateTime +2017-02-01 23:04:05.000000000 DateTime64(9) +2017-06-01 23:04:05 DateTime +2017-06-01 23:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-04-01 11:22:33 DateTime +2017-04-01 11:22:33.000000000 DateTime64(9) +2017 Apr 02 010203 UTC+0300 String +2017 Apr 02 010203.000 UTC+0300 String +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 11:22:33 DateTime +2017-04-02 11:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:22:33 DateTime +2017-04-02 01:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:02:33 DateTime +2017-04-02 01:02:33.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-01 21:02:03 DateTime +2017-04-01 21:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017 Jan 02 010203 UTC+0300 String +2017 Jan 02 010203.000 UTC+0300 String +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 09:32:03 DateTime +2017-01-25 09:32:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +String +2 String +20 String +200 String +2000 String +20000 String +200001 String +2000010 String +20000101 String +200001010 String +2000010101 String +20000101010 String +200001010101 String +2000010101010 String +20000101010101 String +2.1 String +20.1 String +200.1 String +2000.1 String +20000.1 String +200001.1 String +2000010.1 String +20000101.1 String +200001010.1 String +2000010101.1 String +20000101010.1 String +200001010101.1 String +2000010101010.1 String +20000101010101.1 String +Mar String +Mar1 String +Mar 1 String +Mar01 String +Mar 01 String +Mar2020 String +Mar 2020 String +Mar012020 String +Mar 012020 String +Mar01012020 String +Mar 01012020 String +Mar0101202001 String +Mar 0101202001 String +Mar010120200101 String +Mar 010120200101 String +Mar01012020010101 String +Mar 01012020010101 String +Mar01012020010101.000 String +Mar 0101202001010101.000 String +Sun String +Sun1 String +Sun 1 String +Sun01 String +Sun 01 String +Sun2020 String +Sun 2020 String +Sun012020 String +Sun 012020 String +Sun01012020 String +Sun 01012020 String +Sun0101202001 String +Sun 0101202001 String +Sun010120200101 String +Sun 010120200101 String +Sun01012020010101 String +Sun 01012020010101 String +Sun01012020010101.000 String +Sun 0101202001010101.000 String +2000 01 01 01:00:00 String +2000 01 01 01:00:00.000 String +2000a01a01 01:00:00 String +2000a01a01 01:00:00.000 String +2000-01-01 01 00 00 String +2000-01-01 01 00 00.000 String +2000-01-01 01-00-00 String +2000-01-01 01-00-00.000 String +2000-01-01 01a00a00 String +2000-01-01 01a00a00.000 String +2000-01 01:00:00 String +2000-01 01:00:00.000 String +2000 01 String +2000-01 String +Mar 2000 00:00:00 String +Mar 2000 00:00:00.000 String +2000 00:00:00 String +2000 00:00:00.000 String +Mar 2000-01-01 00:00:00 String +Mar 2000-01-01 00:00:00.000 String +1.7.10 String diff --git a/tests/queries/0_stateless/03222_date_time_inference.sql b/tests/queries/0_stateless/03222_date_time_inference.sql new file mode 100644 index 00000000000..b16f72c72f4 --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.sql @@ -0,0 +1,288 @@ +set input_format_try_infer_datetimes = 1; +set input_format_try_infer_dates = 1; +set schema_inference_make_columns_nullable = 0; +set input_format_json_try_infer_numbers_from_strings = 0; +set session_timezone = 'UTC'; + +select 'Date'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/1"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20200101"}'); + +select 'DateTime'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42"}'); + +select 'DateTime64'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42.4242"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42.4242"}'); + +set date_time_input_format='best_effort'; +select 'DateTime/DateTime64 best effort'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01UTC"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50.000 +0300"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun01012020010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 0101202001010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1.7.10"}'); + diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.reference b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference new file mode 100644 index 00000000000..ae36c08acc5 --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference @@ -0,0 +1,18 @@ +0 1970-01-01 00:00:00.000 +0 1970-01-01 00:00:05.000 +0 1970-01-01 00:45:25.456789 +0 1970-01-01 00:53:25.456789123 +0 \N +1 1970-01-01 00:00:00.000 +5 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +4 1970-01-01 00:10:05.000 +4 1970-01-01 00:10:05.000 +1 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +5 1970-01-01 00:00:00.000 +0 +0 +5 diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql new file mode 100644 index 00000000000..a64ebd38ccf --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -0,0 +1,44 @@ +-- Tags: shard +set session_timezone = 'UTC'; -- don't randomize the session timezone +SET allow_experimental_analyzer = 1; + +select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:45:25.456789', 6)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:53:25.456789123', 9)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(null,3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); + +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)); +insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); +insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); +insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); +insert into shard_1.dt64_03222 values(5, toDateTime64(0,3)); + +--Output : 1,5 2,3,4 4 1,2,3,5 0 0 5 +select id, dt from distr_03222_dt64 where dt = (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64('1970-01-01 00:10:00.000',3)) order by id; +select id, dt from distr_03222_dt64 where dt < (select toDateTime64(5,3)) order by id; + +select count(*) from distr_03222_dt64 where dt > (select toDateTime64('2024-07-20 00:00:00',3)); +select count(*) from distr_03222_dt64 where dt > (select now()); +select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-20 00:00:00',3)); + + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +drop database shard_0; +drop database shard_1; diff --git a/tests/queries/0_stateless/03222_json_squashing.reference b/tests/queries/0_stateless/03222_json_squashing.reference new file mode 100644 index 00000000000..d0c19d8239a --- /dev/null +++ b/tests/queries/0_stateless/03222_json_squashing.reference @@ -0,0 +1,102 @@ +All paths +a +b +c +d +Dynamic paths +a +b +c +d +Shared data paths +All paths +a +b +c +d +e +f +Dynamic paths +a +b +c +d +e +f +Shared data paths +All paths +a +b +c +d +Dynamic paths +c +d +Shared data paths +a +b +All paths +a +b +c +d +e +f +Dynamic paths +a +b +Shared data paths +c +d +e +f +All paths +a +b +c +d +e +Dynamic paths +a +e +Shared data paths +b +c +d +All paths +b +c +d +e +Dynamic paths +d +e +Shared data paths +b +c +All paths +b +c +d +e +f +g +Dynamic paths +b +c +Shared data paths +d +e +f +g +All paths +b +d +e +f +Dynamic paths +b +f +Shared data paths +d +e diff --git a/tests/queries/0_stateless/03222_json_squashing.sql b/tests/queries/0_stateless/03222_json_squashing.sql new file mode 100644 index 00000000000..53090c5cb88 --- /dev/null +++ b/tests/queries/0_stateless/03222_json_squashing.sql @@ -0,0 +1,82 @@ +-- Tags: long + +set allow_experimental_json_type = 1; +set max_block_size = 1000; + +drop table if exists test; + +create table test (json JSON) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, '{}'::JSON, number < 3000, '{"a" : 42, "b" : "Hello"}'::JSON, '{"c" : [1, 2, 3], "d" : "2020-01-01"}'::JSON) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(json)) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(json)) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(json)) as path from test order by path; + +truncate table test; +insert into test select multiIf(number < 1000, '{"a" : 42, "b" : "Hello"}'::JSON, number < 3000, '{"c" : [1, 2, 3], "d" : "2020-01-01"}'::JSON, '{"e" : 43, "f" : ["s1", "s2", "s3"]}'::JSON) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(json)) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(json)) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(json)) as path from test order by path; + +drop table test; +create table test (json JSON(max_dynamic_paths=2)) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, '{}'::JSON(max_dynamic_paths=2), number < 3000, '{"a" : 42, "b" : "Hello"}'::JSON(max_dynamic_paths=2), '{"c" : [1, 2, 3], "d" : "2020-01-01"}'::JSON(max_dynamic_paths=2)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(json)) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(json)) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(json)) as path from test order by path; + +truncate table test; +insert into test select multiIf(number < 1000, '{"a" : 42, "b" : "Hello"}'::JSON(max_dynamic_paths=2), number < 3000, '{"c" : [1, 2, 3], "d" : "2020-01-01"}'::JSON(max_dynamic_paths=2), '{"e" : 43, "f" : ["s1", "s2", "s3"]}'::JSON(max_dynamic_paths=2)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(json)) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(json)) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(json)) as path from test order by path; + +truncate table test; +insert into test select multiIf(number < 1000, '{"a" : 42}'::JSON(max_dynamic_paths=2), number < 3000, '{"b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01"}'::JSON(max_dynamic_paths=2), '{"e" : 43}'::JSON(max_dynamic_paths=2)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(json)) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(json)) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(json)) as path from test order by path; + +drop table test; +create table test (json JSON(max_dynamic_paths=8)) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, '{}'::JSON(max_dynamic_paths=8), number < 3000, materialize('{"a" : [{"b" : 42, "c" : [1, 2, 3]}]}')::JSON(max_dynamic_paths=8), materialize('{"a" : [{"d" : "2020-01-01", "e" : "Hello"}]}')::JSON(max_dynamic_paths=8)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(arrayJoin(json.a[]))) as path from test order by path; + +truncate table test; +insert into test select multiIf(number < 1000, materialize('{"a" : [{"b" : 42, "c" : [1, 2, 3]}]}')::JSON(max_dynamic_paths=8), number < 3000, materialize('{"a" : [{"d" : "2020-01-01", "e" : "Hello"}]}')::JSON(max_dynamic_paths=8), materialize('{"a" : [{"f" : "2020-01-01 00:00:00", "g" : "Hello2"}]}')::JSON(max_dynamic_paths=8)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(arrayJoin(json.a[]))) as path from test order by path; + +truncate table test; +insert into test select multiIf(number < 1000, materialize('{"a" : [{"b" : 42}]}')::JSON(max_dynamic_paths=8), number < 3000, materialize('{"a" : [{"d" : "2020-01-01", "e" : "Hello"}]}')::JSON(max_dynamic_paths=8), materialize('{"a" : [{"f" : "2020-01-01 00:00:00"}]}')::JSON(max_dynamic_paths=8)) from numbers(20000); +select 'All paths'; +select distinct arrayJoin(JSONAllPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Dynamic paths'; +select distinct arrayJoin(JSONDynamicPaths(arrayJoin(json.a[]))) as path from test order by path; +select 'Shared data paths'; +select distinct arrayJoin(JSONSharedDataPaths(arrayJoin(json.a[]))) as path from test order by path; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference new file mode 100644 index 00000000000..b6c452ba328 --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference @@ -0,0 +1,21 @@ +1006 +1007 +1008 +1009 +101 +1010 +1011 +1012 +1013 +1014 +--- +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql new file mode 100644 index 00000000000..6f486f8f0fe --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = ReplicatedMergeTree('/clickhouse/03222/{database}/test__fuzz_22', 'r1') ORDER BY k SETTINGS index_granularity = 1; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000); + +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT v +FROM test__fuzz_22 +ORDER BY v +LIMIT 10, 10 +SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +SELECT '---'; + +SELECT k, v +FROM test__fuzz_22 +ORDER BY k +LIMIT 100, 10 +SETTINGS optimize_read_in_order=1, merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +DROP TABLE test__fuzz_22 SYNC; diff --git a/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.reference b/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.sql b/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.sql new file mode 100644 index 00000000000..f3bccc79b3f --- /dev/null +++ b/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.sql @@ -0,0 +1,29 @@ +SET enable_analyzer = 1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (`a` Int64, `b` Int64) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t2 (`key` Int32, `val` Int64) ENGINE = MergeTree ORDER BY key; +insert into t1 Select number, number from numbers(100000); +insert into t2 Select number, number from numbers(100000); + + +SELECT + 1 * 1000.0001, + (count(1.) = -2147483647) AND (count(a) = 1.1920928955078125e-7) AND (count(val) = 1048577) AND (sum(val) = ((NULL * 1048576) / -9223372036854775807)) AND (sum(a) = ((9223372036854775806 * 10000000000.) / 1048575)) +FROM +( + SELECT + a, + val + FROM t1 + FULL OUTER JOIN t2 ON (t1.a = t2.key) OR (1 * inf) OR (t1.b = t2.key) +) +GROUP BY '65537' + WITH CUBE +FORMAT Null +SETTINGS max_block_size = 100, join_use_nulls = 1, max_execution_time = 1., max_result_rows = 0, max_result_bytes = 0; -- { serverError TIMEOUT_EXCEEDED } + +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.reference b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference new file mode 100644 index 00000000000..e98f792e4b2 --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference @@ -0,0 +1,99 @@ +Comparing nanoseconds +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +Comparing microseconds +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +Comparing milliseconds +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +Comparing seconds +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +Comparing minutes +1 +1 +1 +1 +0 +0 +0 +0 +Comparing hours +1 +1 +1 +0 +0 +0 +Comparing days +1 +1 +0 +0 +Comparing weeks +1 +0 +Comparing months +1 +1 +1 +0 +0 +0 +Comparing quarters +1 +1 +0 +0 +Comparing years +1 +0 diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql new file mode 100644 index 00000000000..77b6e2fa3dc --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -0,0 +1,142 @@ +SELECT('Comparing nanoseconds'); +SELECT INTERVAL 500 NANOSECOND > INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND = INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND >= INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND = INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND < INTERVAL 1 WEEK; + +SELECT INTERVAL 500 NANOSECOND < INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND > INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND != INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND != INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND > INTERVAL 1 WEEK; + +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 MONTH; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing microseconds'); +SELECT INTERVAL 1 MICROSECOND < INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND = INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND = INTERVAL 2 WEEK; + +SELECT INTERVAL 1 MICROSECOND > INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND != INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND != INTERVAL 2 WEEK; + +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 1 QUARTER; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing milliseconds'); +SELECT INTERVAL 2000 MILLISECOND > INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND > INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND = INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND < INTERVAL 2 WEEK; + +SELECT INTERVAL 2000 MILLISECOND < INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND < INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND != INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND > INTERVAL 2 WEEK; + +SELECT INTERVAL 36000000000000 MILLISECOND < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing seconds'); +SELECT INTERVAL 120 SECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND = INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND >= INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND = INTERVAL 2 WEEK; + +SELECT INTERVAL 120 SECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND != INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND < INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND != INTERVAL 2 WEEK; + +SELECT INTERVAL 36000000000000 SECOND < INTERVAL 1 MONTH; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing minutes'); +SELECT INTERVAL 1 MINUTE < INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE < INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE = INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE > INTERVAL 3 WEEK; + +SELECT INTERVAL 1 MINUTE > INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE > INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE != INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE < INTERVAL 3 WEEK; + +SELECT INTERVAL 2 MINUTE = INTERVAL 120 QUARTER; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing hours'); +SELECT INTERVAL 48 HOUR > INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR >= INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR = INTERVAL 4 WEEK; + +SELECT INTERVAL 48 HOUR < INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR < INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR != INTERVAL 4 WEEK; + +SELECT INTERVAL 2 HOUR < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing days'); +SELECT INTERVAL 1 DAY < INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY > INTERVAL 3 WEEK; + +SELECT INTERVAL 1 DAY > INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY < INTERVAL 3 WEEK; + +SELECT INTERVAL 2 DAY = INTERVAL 48 MONTH; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing weeks'); +SELECT INTERVAL 1 WEEK < INTERVAL 6 WEEK; + +SELECT INTERVAL 1 WEEK > INTERVAL 6 WEEK; + +SELECT INTERVAL 124 WEEK > INTERVAL 8 QUARTER; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing months'); +SELECT INTERVAL 1 MONTH < INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH > INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH = INTERVAL 3 YEAR; + +SELECT INTERVAL 1 MONTH > INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH < INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH != INTERVAL 3 YEAR; + +SELECT INTERVAL 6 MONTH = INTERVAL 26 MICROSECOND; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing quarters'); +SELECT INTERVAL 5 QUARTER > INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER = INTERVAL 5 YEAR; + +SELECT INTERVAL 5 QUARTER < INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER != INTERVAL 5 YEAR; + +SELECT INTERVAL 2 QUARTER = INTERVAL 6 NANOSECOND; -- { serverError NO_COMMON_TYPE } + +SELECT('Comparing years'); +SELECT INTERVAL 1 YEAR < INTERVAL 3 YEAR; + +SELECT INTERVAL 1 YEAR > INTERVAL 3 YEAR; + +SELECT INTERVAL 2 YEAR = INTERVAL 8 SECOND; -- { serverError NO_COMMON_TYPE } \ No newline at end of file diff --git a/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.reference b/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.reference new file mode 100644 index 00000000000..6c01506e800 --- /dev/null +++ b/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.reference @@ -0,0 +1,81 @@ +All paths +['b'] +['b'] +['b'] +['b'] +['b'] +['c'] +['c'] +['c'] +['c'] +['c'] +Dynamic paths +['b'] +['b'] +['b'] +['b'] +['b'] +[] +[] +[] +[] +[] +Shared data paths +[] +[] +[] +[] +[] +['c'] +['c'] +['c'] +['c'] +['c'] +All paths +['b'] +['b'] +['b'] +['b'] +['b'] +['c'] +['c'] +['c'] +['c'] +['c'] +['b'] +['b'] +['b'] +['b'] +['b'] +Dynamic paths +['b'] +['b'] +['b'] +['b'] +['b'] +['c'] +['c'] +['c'] +['c'] +['c'] +['b'] +['b'] +['b'] +['b'] +['b'] +Shared data paths +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] diff --git a/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.sql b/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.sql new file mode 100644 index 00000000000..311eba37772 --- /dev/null +++ b/tests/queries/0_stateless/03223_nested_json_in_shared_data_merges.sql @@ -0,0 +1,26 @@ +set allow_experimental_json_type = 1; + +drop table if exists test; +create table test (json JSON(max_dynamic_paths=8)) engine=MergeTree order by tuple() settings min_bytes_for_wide_part=1, min_rows_for_wide_part=1; +insert into test select materialize('{"a" : [{"b" : 42}]}')::JSON(max_dynamic_paths=8) from numbers(5); +insert into test select materialize('{"aa1" : 42, "aa2" : 42, "aa3" : 42, "aa4" : 42, "aa5" : 42, "aa6" : 42, "aa7" : 42, "aa8" : 42, "a" : [{"c" : 42}]}') from numbers(5); +optimize table test final; + +select 'All paths'; +select JSONAllPaths(arrayJoin(json.a[])) from test; +select 'Dynamic paths'; +select JSONDynamicPaths(arrayJoin(json.a[])) from test; +select 'Shared data paths'; +select JSONSharedDataPaths(arrayJoin(json.a[])) from test; + +insert into test select materialize('{"a" : [{"b" : 42}]}')::JSON(max_dynamic_paths=8) from numbers(5); +optimize table test final; + +select 'All paths'; +select JSONAllPaths(arrayJoin(json.a[])) from test; +select 'Dynamic paths'; +select JSONDynamicPaths(arrayJoin(json.a[])) from test; +select 'Shared data paths'; +select JSONSharedDataPaths(arrayJoin(json.a[])) from test; + +drop table test; diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql new file mode 100644 index 00000000000..984c7fe0db7 --- /dev/null +++ b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 1; + +SYSTEM STOP MERGES test__fuzz_22; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); + +SET allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', + merge_tree_min_rows_for_concurrent_read = 9223372036854775806, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 9223372036854775806; + + SELECT v + FROM test__fuzz_22 +ORDER BY v + LIMIT 10, 10 +SETTINGS max_threads = 4 + FORMAT Null; -- { serverError BAD_ARGUMENTS } + +DROP TABLE test__fuzz_22 SYNC; diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference new file mode 100644 index 00000000000..e39523ed4f5 --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference @@ -0,0 +1,5 @@ +system.distribution_queue 1 +system.rocksdb 1 +system.databases 1 +system.mutations 1 +system.replication_queue 1 diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql new file mode 100644 index 00000000000..907fa47143c --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest +-- Tag no-fasttest -- due to EmbeddedRocksDB + +drop table if exists null; +drop table if exists dist; +create table null as system.one engine=Null; +create table dist as null engine=Distributed(test_cluster_two_shards, currentDatabase(), 'null', rand()); +insert into dist settings prefer_localhost_replica=0 values (1); +select 'system.distribution_queue', count() from system.distribution_queue where exists(select 1) and database = currentDatabase(); + +drop table if exists rocksdb; +create table rocksdb (key Int) engine=EmbeddedRocksDB() primary key key; +insert into rocksdb values (1); +select 'system.rocksdb', count()>0 from system.rocksdb where exists(select 1) and database = currentDatabase(); + +select 'system.databases', count() from system.databases where exists(select 1) and database = currentDatabase(); + +drop table if exists mt; +create table mt (key Int) engine=MergeTree() order by key; +alter table mt delete where 1; +select 'system.mutations', count() from system.mutations where exists(select 1) and database = currentDatabase(); + +drop table if exists rep1; +drop table if exists rep2; +create table rep1 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +create table rep2 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +system stop fetches rep2; +insert into rep1 values (1); +system sync replica rep2 pull; +select 'system.replication_queue', count()>0 from system.replication_queue where exists(select 1) and database = currentDatabase(); diff --git a/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.reference b/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.reference new file mode 100644 index 00000000000..9d58b3a35db --- /dev/null +++ b/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.reference @@ -0,0 +1,4 @@ +Array(Nullable(Int64)) true +Int64 false +Array(Nullable(Int64)) false +Int64 false diff --git a/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.sql b/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.sql new file mode 100644 index 00000000000..c96d67c0d47 --- /dev/null +++ b/tests/queries/0_stateless/03224_json_merges_new_type_in_shared_data.sql @@ -0,0 +1,12 @@ +set allow_experimental_json_type = 1; + +drop table if exists test; +create table test (json JSON(max_dynamic_paths=1)) engine=MergeTree order by tuple() settings min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; +insert into test select '{"b" : 42}' from numbers(5); +insert into test select '{"a" : 42, "b" : [1, 2, 3]}' from numbers(5); +optimize table test final; +select distinct dynamicType(json.b) as type, isDynamicElementInSharedData(json.b) from test order by type; +insert into test select '{"b" : 42}' from numbers(5); +optimize table test final; +select distinct dynamicType(json.b) as type, isDynamicElementInSharedData(json.b) from test order by type; +drop table test; diff --git a/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.reference b/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.reference new file mode 100644 index 00000000000..b45d9bb97da --- /dev/null +++ b/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.reference @@ -0,0 +1,22 @@ +Array(JSON(max_dynamic_types=16, max_dynamic_paths=2)) true +Int64 false +Array(JSON(max_dynamic_types=16, max_dynamic_paths=2)) false +Int64 false +['c'] +['d'] +Array(JSON(max_dynamic_types=16, max_dynamic_paths=2)) false +Int64 false +['c'] +['d'] +Int64 true +None false +Int64 true +None false +Array(JSON(max_dynamic_types=16, max_dynamic_paths=2)) false +Int64 false +['c'] +['d'] +Int64 false +None false +Int64 false +None false diff --git a/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.sql b/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.sql new file mode 100644 index 00000000000..b22b8b4fb75 --- /dev/null +++ b/tests/queries/0_stateless/03224_nested_json_merges_new_type_in_shared_data.sql @@ -0,0 +1,25 @@ +set allow_experimental_json_type = 1; + +drop table if exists test; +create table test (json JSON(max_dynamic_paths=8)) engine=MergeTree order by tuple() settings min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; +insert into test select materialize('{"a" : 42}')::JSON(max_dynamic_paths=8) from numbers(5); +insert into test select materialize('{"a1" : 42, "a2" : 42, "a3" : 42, "a4" : 42, "a5" : 42, "a6" : 42, "a7" : 42, "a8" : 42, "a" : [{"c" : 42}]}')::JSON(max_dynamic_paths=8) from numbers(5); +optimize table test final; +select distinct dynamicType(json.a) as type, isDynamicElementInSharedData(json.a) from test order by type; +insert into test select materialize('{"a1" : 42, "a2" : 42, "a3" : 42, "a4" : 42, "a5" : 42, "a6" : 42, "a7" : 42, "a8" : 42, "a" : [{"d" : 42}]}')::JSON(max_dynamic_paths=8) from numbers(5); +optimize table test final; +select distinct dynamicType(json.a) as type, isDynamicElementInSharedData(json.a) from test order by type; +select distinct JSONSharedDataPaths(arrayJoin(json.a[])) as path from test order by path; +insert into test select materialize('{"a" : 42}')::JSON(max_dynamic_paths=8) from numbers(5); +optimize table test final; +select distinct dynamicType(json.a) as type, isDynamicElementInSharedData(json.a) from test order by type; +select distinct JSONDynamicPaths(arrayJoin(json.a[])) as path from test order by path; +select distinct dynamicType(arrayJoin(json.a[].c)) as type, isDynamicElementInSharedData(arrayJoin(json.a[].c)) from test order by type; +select distinct dynamicType(arrayJoin(json.a[].d)) as type, isDynamicElementInSharedData(arrayJoin(json.a[].d)) from test order by type; +insert into test select materialize('{"a" : 42}')::JSON(max_dynamic_paths=8) from numbers(5); +optimize table test final; +select distinct dynamicType(json.a) as type, isDynamicElementInSharedData(json.a) from test order by type; +select distinct JSONDynamicPaths(arrayJoin(json.a[])) as path from test order by path; +select distinct dynamicType(arrayJoin(json.a[].c)) as type, isDynamicElementInSharedData(arrayJoin(json.a[].c)) from test order by type; +select distinct dynamicType(arrayJoin(json.a[].d)) as type, isDynamicElementInSharedData(arrayJoin(json.a[].d)) from test order by type; +drop table test; diff --git a/tests/queries/0_stateless/03224_tuple_element_identifier.reference b/tests/queries/0_stateless/03224_tuple_element_identifier.reference new file mode 100644 index 00000000000..0fc9e7410c1 --- /dev/null +++ b/tests/queries/0_stateless/03224_tuple_element_identifier.reference @@ -0,0 +1,4 @@ +([('wtf')]) [('wtf')] wtf +([('wtf')]) [('wtf')] wtf +Hello +('Hello') Hello Hello Hello diff --git a/tests/queries/0_stateless/03224_tuple_element_identifier.sql b/tests/queries/0_stateless/03224_tuple_element_identifier.sql new file mode 100644 index 00000000000..2a7fb9a97a3 --- /dev/null +++ b/tests/queries/0_stateless/03224_tuple_element_identifier.sql @@ -0,0 +1,13 @@ +SET enable_analyzer = 1; + +SELECT JSONExtract('{"hello":[{"world":"wtf"}]}', 'Tuple(hello Array(Tuple(world String)))') AS x, + x.hello, x.hello[1].world; + +SELECT JSONExtract('{"hello":[{" wow ":"wtf"}]}', 'Tuple(hello Array(Tuple(` wow ` String)))') AS x, + x.hello, x.hello[1].` wow `; + +SELECT JSONExtract('{"hello":[{" wow ":"wtf"}]}', 'Tuple(hello Array(Tuple(` wow ` String)))') AS x, + x.hello, x.hello[1].`wow`; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK } + +SELECT ('Hello' AS world,).world; +SELECT ('Hello' AS world,) AS t, t.world, (t).world, identity(t).world; diff --git a/tests/queries/0_stateless/03225_alter_to_json_not_supported.reference b/tests/queries/0_stateless/03225_alter_to_json_not_supported.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql b/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql new file mode 100644 index 00000000000..398494d56de --- /dev/null +++ b/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql @@ -0,0 +1,15 @@ +set allow_experimental_json_type = 1; + +drop table if exists test; +create table test (s String) engine=MergeTree order by tuple(); +alter table test modify column s JSON; -- { serverError BAD_ARGUMENTS } +drop table test; + +create table test (s Array(String)) engine=MergeTree order by tuple(); +alter table test modify column s Array(JSON); -- { serverError BAD_ARGUMENTS } +drop table test; + +create table test (s Tuple(String, String)) engine=MergeTree order by tuple(); +alter table test modify column s Tuple(JSON, String); -- { serverError BAD_ARGUMENTS } +drop table test; + diff --git a/tests/queries/0_stateless/03226_alter_update_dynamic_json_not_supported.reference b/tests/queries/0_stateless/03226_alter_update_dynamic_json_not_supported.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03226_alter_update_dynamic_json_not_supported.sql b/tests/queries/0_stateless/03226_alter_update_dynamic_json_not_supported.sql new file mode 100644 index 00000000000..720f8670c83 --- /dev/null +++ b/tests/queries/0_stateless/03226_alter_update_dynamic_json_not_supported.sql @@ -0,0 +1,9 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_json_type = 1; + +drop table if exists test; +create table test (d Dynamic, json JSON) engine=MergeTree order by tuple(); +alter table test update d = 42 where 1; -- {serverError CANNOT_UPDATE_COLUMN} +alter table test update json = '{}' where 1; -- {serverError CANNOT_UPDATE_COLUMN} +drop table test; + diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference new file mode 100644 index 00000000000..b9d711bf9f0 --- /dev/null +++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference @@ -0,0 +1,15 @@ +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +str_3 +str_4 +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql new file mode 100644 index 00000000000..e451521faca --- /dev/null +++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql @@ -0,0 +1,9 @@ +set allow_experimental_json_type=1; +drop table if exists test; +create table test (json JSON) engine=Memory; +insert into test select toJSONString(map('a', 'str_' || number)) from numbers(5); +select json.a.String from test; +select json.a.:String from test; +select json.a.UInt64 from test; +drop table test; + diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.reference b/tests/queries/0_stateless/03227_json_invalid_regexp.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql new file mode 100644 index 00000000000..d98e2ade29d --- /dev/null +++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql @@ -0,0 +1,4 @@ +set allow_experimental_json_type = 1; +create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP} +create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP}; + diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference new file mode 100644 index 00000000000..afaaaaa6119 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference @@ -0,0 +1,56 @@ + +SHOW CREATE TABLE: +CREATE TABLE default.test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 +CREATE TABLE default.test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 + +clickhouse-format: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test (`x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String) ORDER BY tuple() + +formatQuery: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ORDER BY tuple() diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh new file mode 100755 index 00000000000..e5614f9f228 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-asan, no-msan, no-tsan +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo +echo "SHOW CREATE TABLE:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + DROP TABLE IF EXISTS test; + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY (); + SET print_pretty_type_names = 1; + SHOW CREATE TABLE test; + SET print_pretty_type_names = 0; + SHOW CREATE TABLE test; + DROP TABLE test; +" + +echo +echo "clickhouse-format:" +${CLICKHOUSE_FORMAT} --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" +${CLICKHOUSE_FORMAT} --oneline --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" + +echo +echo "formatQuery:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 1; + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 0; +" diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference new file mode 100644 index 00000000000..2127d396bb3 --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference @@ -0,0 +1,4 @@ +414243 +ABC +A +{"a": \'A\'} diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql new file mode 100644 index 00000000000..0c2e7dc582a --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql @@ -0,0 +1,6 @@ +SELECT '414243'::String; +SELECT x'414243'::String; +SELECT b'01000001'::String; +SELECT '{"a": \'\x41\'}'::String; +SELECT '{"a": \'\x4\'}'::String; -- { clientError SYNTAX_ERROR } +SELECT '{"a": \'a\x4\'}'::String; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference new file mode 100644 index 00000000000..153ad78f694 --- /dev/null +++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference @@ -0,0 +1,4 @@ +str +42 +42 +42 diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql new file mode 100644 index 00000000000..a10b0cb2809 --- /dev/null +++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql @@ -0,0 +1,9 @@ +set allow_experimental_dynamic_type=1; +set allow_experimental_json_type=1; +set allow_experimental_analyzer=1; + +select d.String from (select 'str'::Dynamic as d); +select json.a from (select '{"a" : 42}'::JSON as json); +select json.a from (select '{"a" : 42}'::JSON(a UInt32) as json); +select json.a.:Int64 from (select materialize('{"a" : 42}')::JSON as json); + diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference new file mode 100644 index 00000000000..be9cdedaf07 --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -0,0 +1,8 @@ +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql new file mode 100644 index 00000000000..81eb2ed69af --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -0,0 +1,33 @@ +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS test_new_json_type; +CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +DROP TABLE test_new_json_type; + +CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST; + +DROP TABLE test_new_json_type; + diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference new file mode 100644 index 00000000000..28f00bafdfe --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference @@ -0,0 +1,8 @@ +1 t_local_1 +2 t_local_2 +1 t_local_1 +2 t_local_2 +1 1 +2 1 +1 1 +2 1 diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql new file mode 100644 index 00000000000..e58c7f38d3b --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql @@ -0,0 +1,27 @@ +-- There is a bug in old analyzer with currentDatabase() and distributed query. +SET enable_analyzer = 1; + +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; + +CREATE TABLE t_local_1 (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t_local_2 (a UInt32) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_local_1 VALUES (1); +INSERT INTO t_local_2 VALUES (2); + +CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$'); +CREATE TABLE t_distr AS t_local_1 ENGINE = Distributed('test_shard_localhost', currentDatabase(), t_merge, rand()); + +SELECT a, _table FROM t_merge ORDER BY a; +SELECT a, _table FROM t_distr ORDER BY a; + +SELECT a, _database = currentDatabase() FROM t_merge ORDER BY a; +SELECT a, _database = currentDatabase() FROM t_distr ORDER BY a; + +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.reference b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference new file mode 100644 index 00000000000..6a452c185a8 --- /dev/null +++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference @@ -0,0 +1 @@ +() diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.sql b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql new file mode 100644 index 00000000000..09ba3595a5a --- /dev/null +++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql @@ -0,0 +1 @@ +select [()][0]; diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.reference b/tests/queries/0_stateless/03229_json_structure_comparison.reference new file mode 100644 index 00000000000..c816df4f5c7 --- /dev/null +++ b/tests/queries/0_stateless/03229_json_structure_comparison.reference @@ -0,0 +1,3 @@ +{"foo1":"bar"} {"foo1":"bar"} +{"foo2":"bar"} {"foo2":"bar"} +{"foo2":"bar"} {"foo2":"bar"} diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.sql b/tests/queries/0_stateless/03229_json_structure_comparison.sql new file mode 100644 index 00000000000..16db469325d --- /dev/null +++ b/tests/queries/0_stateless/03229_json_structure_comparison.sql @@ -0,0 +1,22 @@ +SET allow_experimental_json_type=1; + +DROP TABLE IF EXISTS test_new_json_type; + +CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; + +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT + a.data + , b.data +FROM + test_new_json_type a + JOIN test_new_json_type b + ON a.id = b.id; + +DROP TABLE test_new_json_type; + diff --git a/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv new file mode 100644 index 00000000000..936d995cc64 --- /dev/null +++ b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv @@ -0,0 +1,5 @@ +_login_email,_identifier,_first_name,_last_name +laura@example.com,2070,Laura,Grey +craig@example.com,4081,Craig,Johnson +mary@example.com,9346,Mary,Jenkins +jamie@example.com,5079,Jamie,Smith diff --git a/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/68131.parquet b/tests/queries/0_stateless/data_parquet/68131.parquet new file mode 100644 index 00000000000..169f6152003 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/68131.parquet differ diff --git a/tests/queries/0_stateless/helpers/pure_http_client.py b/tests/queries/0_stateless/helpers/pure_http_client.py index a31a91e0550..c3c4109ce5b 100644 --- a/tests/queries/0_stateless/helpers/pure_http_client.py +++ b/tests/queries/0_stateless/helpers/pure_http_client.py @@ -19,9 +19,14 @@ class ClickHouseClient: self.host = host def query( - self, query, connection_timeout=500, settings=dict(), binary_result=False + self, + query, + connection_timeout=500, + settings=dict(), + binary_result=False, + with_retries=True, ): - NUMBER_OF_TRIES = 30 + NUMBER_OF_TRIES = 30 if with_retries else 1 DELAY = 10 params = { @@ -40,7 +45,8 @@ class ClickHouseClient: if r.status_code == 200: return r.content if binary_result else r.text else: - print("ATTENTION: try #%d failed" % i) + if with_retries: + print("ATTENTION: try #%d failed" % i) if i != (NUMBER_OF_TRIES - 1): print(query) print(r.text) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 05651531fba..36309cf0331 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -89,7 +89,7 @@ function check_replication_consistency() # Touch all data to check that it's readable (and trigger PartCheckThread if needed) # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do + while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_result_rows 0 --max_result_bytes 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do sleep 1; num_tries=$((num_tries+1)) if [ $num_tries -eq 250 ]; then @@ -114,7 +114,8 @@ function check_replication_consistency() # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q \ + # also, disable the limit that is set for tests globally + res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_rows_to_read=0 -q \ "SELECT if((countDistinct(data) as c) == 0, 1, c) FROM diff --git a/tests/queries/1_stateful/00067_union_all.sql b/tests/queries/1_stateful/00067_union_all.sql index 2a1d00e975d..9ee14b36b03 100644 --- a/tests/queries/1_stateful/00067_union_all.sql +++ b/tests/queries/1_stateful/00067_union_all.sql @@ -10,4 +10,5 @@ UNION ALL ORDER BY id DESC LIMIT 10 ) -ORDER BY id, event; +ORDER BY id, event +SETTINGS max_rows_to_read = 40_000_000; diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql index 52f9c46997f..8f18f3740e4 100644 --- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql +++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql @@ -1,4 +1,4 @@ -- Tags: shard -SET output_format_write_statistics = 0; +SET output_format_write_statistics = 0, max_rows_to_read = 50_000_000; SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact; diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql index 075c01530c6..f0b249e9af4 100644 --- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql +++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql @@ -1,4 +1,5 @@ -- Tags: global +SET max_rows_to_read = 100_000_000; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql index 6f910646fb7..16b565985ea 100644 --- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql +++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = 100_000_000; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID); SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 3621ff82126..f1bee538828 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,8 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 + SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 32c109417de..631a7751550 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -10,6 +10,8 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PA LIFETIME(MIN 300 MAX 600) LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +SET timeout_before_checking_execution_speed = 300; + SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; @@ -20,4 +22,4 @@ SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) a SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; -DROP DATABASE IF EXISTS db_dict; +DROP DATABASE IF EXISTS db_dict; diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 7b3f50f8141..184a8edcbcb 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,5 +1,6 @@ -- Tags: no-random-settings +SET max_memory_usage = '10G'; SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. diff --git a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql index 7068780a1b1..b3e4d749328 100644 --- a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql +++ b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = '100M'; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS max_block_size = 63169; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1, max_block_size = 63169; diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql index e73de4b33fb..26e112cff04 100644 --- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -1,6 +1,7 @@ -- Tags: global set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; +set max_rows_to_read = '100M'; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/tsan_ignorelist.txt b/tests/tsan_ignorelist.txt index 96bf6e4251f..2a31fc9bc15 100644 --- a/tests/tsan_ignorelist.txt +++ b/tests/tsan_ignorelist.txt @@ -5,11 +5,9 @@ # # Caveats for generic entry "fun": # - does not work for __attribute__((__always_inline__)) +# - and may not work for functions that had been inlined # - requires asterisk at the beginning *and* end for static functions # [thread] # https://github.com/ClickHouse/ClickHouse/issues/55629 fun:rd_kafka_broker_set_nodename -# https://github.com/ClickHouse/ClickHouse/issues/60443 -fun:*rd_avg_calc* -fun:*rd_avg_rollover* diff --git a/tests/ubsan_ignorelist.txt b/tests/ubsan_ignorelist.txt index 57d6598afa6..b75819b3f4b 100644 --- a/tests/ubsan_ignorelist.txt +++ b/tests/ubsan_ignorelist.txt @@ -9,6 +9,7 @@ # # Caveats for generic entry "fun": # - does not work for __attribute__((__always_inline__)) +# - and may not work for functions that had been inlined # - requires asterisk at the beginning *and* end for static functions # [undefined] diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 37094a1a088..7f90fc4664e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2942 +personal_ws-1.1 en 2942 AArch ACLs ALTERs @@ -385,12 +385,26 @@ IntelliJ IntelliSense InterserverConnection InterserverThreads +IntervalDay +IntervalHour +IntervalMicrosecond +IntervalMillisecond +IntervalMilliseconds +IntervalMinute +IntervalMonth +IntervalNanosecond +IntervalQuarter +IntervalSecond +IntervalWeek +IntervalYear IsPentagon IsResClassIII IsValid JBOD JOINed JOINs +JSONAllPaths +JSONAllPathsWithTypes JSONArrayLength JSONAsObject JSONAsString @@ -405,6 +419,8 @@ JSONCompactStrings JSONCompactStringsEachRow JSONCompactStringsEachRowWithNames JSONCompactStringsEachRowWithNamesAndTypes +JSONDynamicPaths +JSONDynamicPathsWithTypes JSONEachRow JSONEachRowWithProgress JSONExtract @@ -424,6 +440,8 @@ JSONObjectEachRow JSONStrings JSONStringsEachRow JSONStringsEachRowWithProgress +JSONSharedDataPaths +JSONSharedDataPathsWithTypes JSONType JSONs Jaeger @@ -562,6 +580,7 @@ MindsDB Mongodb Monotonicity MsgPack +MultiLineString MultiPolygon Multiline Multiqueries @@ -1674,6 +1693,7 @@ fuzzQuery fuzzer fuzzers gRPC +gaugehistogram gccMurmurHash gcem generateRandom @@ -1700,6 +1720,8 @@ getOSKernelVersion getServerPort getSetting getSizeOfEnumType +getSubcolumn +getTypeSerializationStreams getblockinfo getevents ghcnd @@ -2089,6 +2111,7 @@ multiSearchFirstPositionUTF multibyte multidirectory multiline +multilinestring multiplyDecimal multipolygon multisearchany @@ -2112,11 +2135,14 @@ namenode namepassword nameprofile namequota +namespace namespaces natively nats +ness nestjs netloc +newjson ngram ngramDistance ngramDistanceCaseInsensitive @@ -2184,6 +2210,7 @@ outfile overcommit overcommitted overfitting +overlayUTF overparallelization packetpool packetsize @@ -2366,6 +2393,7 @@ rankCorr rapidjson rawblob readWKTLineString +readWKTMultiLineString readWKTMultiPolygon readWKTPoint readWKTPolygon @@ -2558,6 +2586,7 @@ startsWithUTF startswith statbox stateful +stateset stddev stddevPop stddevPopStable @@ -2689,6 +2718,10 @@ themself threadpool throwIf timeDiff +TimeSeries +timeSeriesData +timeSeriesMetrics +timeSeriesTags timeSlot timeSlots timeZone @@ -2720,6 +2753,17 @@ toISOWeek toISOYear toInt toInterval +toIntervalDay +toIntervalHour +toIntervalMicrosecond +toIntervalMillisecond +toIntervalMinute +toIntervalMonth +toIntervalNanosecond +toIntervalQuarter +toIntervalSecond +toIntervalWeek +toIntervalYear toJSONString toLastDayOfMonth toLastDayOfWeek diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 3c959617d02..46593e85e45 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -467,3 +467,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | grep -vP $EXCLUDE_DIRS | xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' && echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice." + +PATTERN="allow_"; +DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.h | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u)); +[ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt" diff --git a/utils/check-style/experimental_settings_ignore.txt b/utils/check-style/experimental_settings_ignore.txt new file mode 100644 index 00000000000..94c46cf562e --- /dev/null +++ b/utils/check-style/experimental_settings_ignore.txt @@ -0,0 +1,48 @@ +allow_aggregate_partitions_independently +allow_archive_path_syntax +allow_asynchronous_read_from_io_pool_for_merge_tree +allow_changing_replica_until_first_data_packet +allow_custom_error_code_in_throwif +allow_ddl +allow_deprecated_database_ordinary +allow_deprecated_snowflake_conversion_functions +allow_distributed_ddl +allow_drop_detached +allow_execute_multiif_columnar +allow_experimental_alter_materialized_view_structure +allow_experimental_analyzer +allow_experimental_annoy_index +allow_experimental_database_atomic +allow_experimental_database_materialized_mysql +allow_experimental_database_materialized_postgresql +allow_experimental_database_replicated +allow_experimental_join_condition +allow_experimental_kafka_offsets_storage_in_keeper +allow_experimental_lightweight_delete +allow_experimental_materialized_postgresql_table +allow_experimental_parallel_reading_from_replicas +allow_experimental_projection_optimization +allow_experimental_query_cache +allow_experimental_query_deduplication +allow_experimental_refreshable_materialized_view +allow_experimental_shared_merge_tree +allow_experimental_statistic +allow_experimental_statistics +allow_experimental_time_series_table +allow_experimental_undrop_table_query +allow_experimental_usearch_index +allow_get_client_http_header +allow_introspection_functions +allow_materialized_view_with_bad_select +allow_named_collection_override_by_default +allow_non_metadata_alters +allow_nonconst_timezone_arguments +allow_nondeterministic_mutations +allow_nondeterministic_optimize_skip_unused_shards +allow_prefetched_read_pool_for_local_filesystem +allow_prefetched_read_pool_for_remote_filesystem +allow_push_predicate_when_subquery_contains_with +allow_settings_after_format_in_insert +allow_statistic_optimize +allow_statistics_optimize +allow_unrestricted_reads_from_keeper diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e410f31ca5a..d9674ed2366 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,8 +1,14 @@ +v24.8.2.3-lts 2024-08-22 +v24.8.1.2684-lts 2024-08-21 +v24.7.4.51-stable 2024-08-23 +v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 +v24.6.4.42-stable 2024-08-23 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 +v24.5.6.45-stable 2024-08-23 v24.5.5.78-stable 2024-08-05 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -12,6 +18,9 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 +v24.3.8.13-lts 2024-08-20 +v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13